continue icon indicating copy to clipboard operation
continue copied to clipboard

Add support of Enchained Caching for Anthropic.

Open md2k opened this issue 4 months ago • 0 comments

Validations

  • [x] I believe this is a way to improve. I'll try to join the Continue Discord for questions
  • [x] I'm not able to find an open issue that requests the same enhancement

Problem

Anthropic added support for extended caching capabilities, so their Beta feature allow to cache context not only for a 5 minutes but for 1 hour, which is can improve coding experience and chat for big context, because 5 minutes caching not always playing well.

Official docs: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration

Solution

As solution i tried to create simple patch, but unfortunately not able to build extension under my macos m3 due multiple errors.

Patch content under the cut
diff --git a/core/index.d.ts b/core/index.d.ts
index ce78a8d82..a6cf6d3b2 100644
--- a/core/index.d.ts
+++ b/core/index.d.ts
@@ -926,6 +926,8 @@ export interface RequestOptions {
 export interface CacheBehavior {
   cacheSystemMessage?: boolean;
   cacheConversation?: boolean;
+  useExtendedCacheTtlBeta?: boolean;
+  cacheTtl?: string;
 }

 export interface ClientCertificateOptions {
diff --git a/core/llm/llms/Anthropic.ts b/core/llm/llms/Anthropic.ts
index 8d1d3a67f..759157d87 100644
--- a/core/llm/llms/Anthropic.ts
+++ b/core/llm/llms/Anthropic.ts
@@ -100,7 +100,11 @@ class Anthropic extends BaseLLM {
           {
             type: "text",
             text: message.content,
-            ...(addCaching ? { cache_control: { type: "ephemeral" } } : {}),
+            ...(addCaching
+              ? { cache_control: this.cacheBehavior?.useExtendedCacheTtlBeta
+                ? { type: "ephemeral", ttl: this.cacheBehavior?.cacheTtl ?? "5m" }
+                : { type: "ephemeral" } }
+              : {}),
           },
         ],
       };
@@ -115,7 +119,9 @@ class Anthropic extends BaseLLM {
             ...part,
             // If multiple text parts, only add cache_control to the last one
             ...(addCaching && contentIdx === message.content.length - 1
-              ? { cache_control: { type: "ephemeral" } }
+              ? { cache_control: this.cacheBehavior?.useExtendedCacheTtlBeta
+                ? { type: "ephemeral", ttl: this.cacheBehavior?.cacheTtl ?? "5m" }
+                : { type: "ephemeral" } }
               : {}),
           };
           return newpart;
@@ -194,9 +200,12 @@ class Anthropic extends BaseLLM {
         Accept: "application/json",
         "anthropic-version": "2023-06-01",
         "x-api-key": this.apiKey as string,
-        ...(shouldCacheSystemMessage || this.cacheBehavior?.cacheConversation
-          ? { "anthropic-beta": "prompt-caching-2024-07-31" }
-          : {}),
+        ...(this.cacheBehavior?.useExtendedCacheTtlBeta
+          ? { "anthropic-beta": "extended-cache-ttl-2025-04-11" }
+          : (shouldCacheSystemMessage || this.cacheBehavior?.cacheConversation
+            ? { "anthropic-beta": "prompt-caching-2024-07-31" }
+            : {})
+          ),
       },
       body: JSON.stringify({
         ...this.convertArgs(options),
@@ -206,7 +215,9 @@ class Anthropic extends BaseLLM {
               {
                 type: "text",
                 text: systemMessage,
-                cache_control: { type: "ephemeral" },
+                cache_control: this.cacheBehavior?.useExtendedCacheTtlBeta
+                  ? { type: "ephemeral", ttl: this.cacheBehavior?.cacheTtl ?? "5m" }
+                  : { type: "ephemeral" },
               },
             ]
           : systemMessage,
diff --git a/docs/docs/customize/model-providers/top-level/anthropic.mdx b/docs/docs/customize/model-providers/top-level/anthropic.mdx
index d7d1e325c..4b0d1b3eb 100644
--- a/docs/docs/customize/model-providers/top-level/anthropic.mdx
+++ b/docs/docs/customize/model-providers/top-level/anthropic.mdx
@@ -60,12 +60,17 @@ Anthropic currently does not offer any reranking models.

 ## Prompt caching

-Anthropic supports [prompt caching with Claude](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching).
+Anthropic supports [prompt caching with Claude](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching) and
+as part of their `Beta` support [Extended caching](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching#1-hour-cache-duration)
+
+* `useExtendedCacheTtlBeta` - if `true` will enable Beta feature and allow to set 5m or 1h ttl for caching
+* `cacheTtl` - accepts only `5m` or `1h` as values, and if parameter not set `5m` default cache will be used then.

 To enable caching of the system message and the turn-by-turn conversation, update your your model configuration as following:

 <Tabs groupId="config-example">
   <TabItem value="yaml" label="YAML">
+
   ```yaml title="config.yaml"
   models:
     - name: Anthropic
@@ -77,16 +82,21 @@ To enable caching of the system message and the turn-by-turn conversation, updat
       cacheBehavior:
         cacheSystemMessage: true
         cacheConversation: true
+        useExtendedCacheTtlBeta: true
+        cacheTtl: "1h"
+ ```json title="config.json" { "models": [ { "cacheBehavior": { "cacheSystemMessage": true, - "cacheConversation": true + "cacheConversation": true, + "useExtendedCacheTtlBeta": true, + "cacheTtl": "1h" }, "title": "Anthropic", "provider": "anthropic", diff --git a/packages/config-yaml/src/schemas/models.ts b/packages/config-yaml/src/schemas/models.ts index 893f7117a..5b531aa34 100644 --- a/packages/config-yaml/src/schemas/models.ts +++ b/packages/config-yaml/src/schemas/models.ts @@ -69,6 +69,8 @@ export type EmbeddingPrefixes = z.infer; export const cacheBehaviorSchema = z.object({ cacheSystemMessage: z.boolean().optional(), cacheConversation: z.boolean().optional(), + useExtendedCacheTtlBeta: z.boolean().optional(), + cacheTtl: z.enum(["5m", "1h"]).optional(), }); export type CacheBehavior = z.infer; ```

md2k avatar Jun 15 '25 21:06 md2k