Bedrock Claude 3.5 Sonnet v2 is not supporting new attachments (PDF)
I use Anthropic's Claude models via Amazon Bedrock. I wanted to try the new model Claude 3.5 Sonnet v2 with PDF attachment support but it's not working. I am using Python and tried using both libraries boto3 (bedrock-runtime client) and anthropic.AnthropicBedrock. I could not find any documentation given by either AWS or Anthropic on how to attach PDF file so I'm just doing hit and trials.
Boto
For example, I tried using Bedrock playground and it works on UI. I exported the messages as JSON (see screenshot) and then coded the same in python using boto.
import os
import json
from io import BytesIO
from base64 import b64encode
from dotenv import load_dotenv
import boto3
from botocore.config import Config
from anthropic import AnthropicBedrock
def query_claude_v2():
load_dotenv()
prompt = "extract this file"
input_pdf = "release/data/11.pdf"
message_content = [{"type": "text", "text": prompt}]
with open(input_pdf, "rb") as file:
message_content.append(
{
"type": "document",
"attrs": {
"format": "pdf",
"name": "11.pdf",
"source": {
"bytes": list(file.read())
}
}
}
)
client = boto3.Session().client(
service_name="bedrock-runtime",
region_name=os.environ["region"],
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
aws_session_token=os.environ["AWS_SESSION_TOKEN"],
config=Config(read_timeout=600) # 10 minutes
)
native_request = {
"anthropic_version": "bedrock-2023-05-31",
"max_tokens": 8192,
"messages": [{"role": "user", "content": message_content}],
"temperature": 0.2
}
request = json.dumps(native_request)
response = client.invoke_model(
modelId="anthropic.claude-3-5-sonnet-20241022-v2:0",
body=request
)
response_body = json.loads(response["body"].read())
response_text = response_body["content"][0]["text"]
return response_text
print(query_claude_v2())
It gives me this error:
botocore.errorfactory.ValidationException: An error occurred (ValidationException) when calling the InvokeModel operation: messages.0.content.1: Input tag 'document' found using 'type' does not match any of the expected tags: 'text', 'image', 'tool_use', 'tool_result'
Maybe AWS needs to add this support in their validator.
Anthropic
I also tried using Anthropic SDK for Python with AnthropicBedrock. Took help from the code of this PR #721 and coded in python
import os
import json
from io import BytesIO
from base64 import b64encode
from dotenv import load_dotenv
import boto3
from botocore.config import Config
from anthropic import AnthropicBedrock
def query_claude_v2_anthropic():
load_dotenv()
prompt = "extract this file"
input_pdf = "release/data/11.pdf"
with open(input_pdf, "rb") as f:
pdf_stream = BytesIO(f.read())
bedrock_client = AnthropicBedrock(
aws_access_key=os.environ["AWS_ACCESS_KEY_ID"],
aws_secret_key=os.environ["AWS_SECRET_ACCESS_KEY"],
aws_session_token=os.environ["AWS_SESSION_TOKEN"],
aws_region=os.environ["region"],
timeout=600
)
response = bedrock_client.messages.create(
max_tokens=8192,
messages=[{
"role": "user", "content": [
{"type": "text", "text": prompt},
{"type": "document", "source": {
"type": "base64",
"media_type": "application/pdf",
"data": b64encode(pdf_stream.getvalue()).decode("utf-8")
}}
]
}],
model="anthropic.claude-3-5-sonnet-20241022-v2:0",
temperature=0.2
)
return response.content[0].text
print(query_claude_v2_anthropic())
It gives me this error:
anthropic.BadRequestError: Error code: 400 - {'message': "messages.0.content.1: Input tag 'document' found using 'type' does not match any of the expected tags: 'text', 'image', 'tool_use', 'tool_result'"}
@ac-shivamaggarwal PDF support is in beta so you need to explicitly enable it, try changing your example snippet to use the .beta.messages.create() method and set betas to include PDFs like so
bedrock_client.beta.messages.create(
betas=['pdfs-2024-09-25'],
...
)
Thank you @RobertCraigie, I have modified both functions to include anthropic-beta headers but I am still getting the same error
import os
import json
from io import BytesIO
from base64 import b64encode
from dotenv import load_dotenv
import boto3
from botocore.config import Config
from anthropic import AnthropicBedrock
def query_claude_v2():
load_dotenv()
prompt = "extract this file"
input_pdf = "/Users/shivam/Downloads/11-1-43-9-11.pdf"
message_content = [{"type": "text", "text": prompt}]
with open(input_pdf, "rb") as file:
message_content.append(
{
"type": "document",
"attrs": {
"format": "pdf",
"source": {
"bytes": list(file.read())
}
}
}
)
client = boto3.Session().client(
service_name="bedrock-runtime",
region_name=os.environ["region"],
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
aws_session_token=os.environ["AWS_SESSION_TOKEN"],
config=Config(read_timeout=600) # 10 minutes
)
def add_custom_headers(request, **kwargs):
request.headers['anthropic-beta'] = 'pdfs-2024-09-25'
print(request)
client.meta.events.register('before-send.bedrock-runtime.InvokeModel', add_custom_headers)
native_request = {
"anthropic_version": "bedrock-2023-05-31",
"max_tokens": 8192,
"messages": [{"role": "user", "content": message_content}],
"temperature": 0.2
}
request = json.dumps(native_request)
response = client.invoke_model(
modelId="anthropic.claude-3-5-sonnet-20241022-v2:0",
body=request
)
response_body = json.loads(response["body"].read())
response_text = response_body["content"][0]["text"]
return response_text
def query_claude_v2_anthropic():
load_dotenv()
prompt = "extract this file"
input_pdf = "/Users/shivam/Downloads/11-1-43-9-11.pdf"
with open(input_pdf, "rb") as f:
pdf_stream = BytesIO(f.read())
bedrock_client = AnthropicBedrock(
aws_access_key=os.environ["AWS_ACCESS_KEY_ID"],
aws_secret_key=os.environ["AWS_SECRET_ACCESS_KEY"],
aws_session_token=os.environ["AWS_SESSION_TOKEN"],
aws_region=os.environ["region"],
timeout=600
)
response = bedrock_client.beta.messages.create(
max_tokens=8192,
messages=[{
"role": "user", "content": [
{"type": "text", "text": prompt},
{"type": "document", "source": {
"type": "base64",
"media_type": "application/pdf",
"data": b64encode(pdf_stream.getvalue()).decode("utf-8")
}}
]
}],
model="anthropic.claude-3-5-sonnet-20241022-v2:0",
temperature=0.2,
betas=['pdfs-2024-09-25']
)
return response.content[0].text
# print(query_claude_v2())
print(query_claude_v2_anthropic())
Error
anthropic.BadRequestError: Error code: 400 - {'message': "messages.0.content.1: Input tag 'document' found using 'type' does not match any of the expected tags: 'text', 'image', 'tool_use', 'tool_result'"}
I suppose AWS or Anthropic is running some kind of validation and is not allowing the type: document content type.
Any updates on this? I am getting the same error.
The official documentation states pdf support is coming soon to bedrock.
Supported platforms and models PDF support is currently available on Claude 3.7 Sonnet (claude-3-7-sonnet-20250219), both Claude 3.5 Sonnet models (claude-3-5-sonnet-20241022, claude-3-5-sonnet-20240620), and Claude 3.5 Haiku (claude-3-5-haiku-20241022) via direct API access and Google Vertex AI. This functionality will be supported on Amazon Bedrock soon.
https://docs.anthropic.com/en/docs/build-with-claude/pdf-support
any update on this?
This should be now supported: https://aws.amazon.com/about-aws/whats-new/2025/06/citations-api-pdf-claude-models-amazon-bedrock/