codeflash-internal/.tessl/tiles/tessl/npm-anthropic-ai--sdk/docs/messages.md
2026-02-15 03:56:05 -05:00

22 KiB

Messages API

The Messages API is the core interface for conversational interactions with Claude models. It supports text, images, documents, tool use, thinking, prompt caching, and both streaming and non-streaming modes.

Overview

The Messages API allows you to:

  • Send structured conversations with multiple turns
  • Include text, images, and document content
  • Use tools (function calling) for extended capabilities
  • Stream responses in real-time
  • Cache prompts for efficient repeated use
  • Access extended thinking traces
  • Count tokens before creating messages

API Reference

class Messages extends APIResource {
  // Sub-resources
  batches: Batches;

  // Methods
  create(params: MessageCreateParamsNonStreaming, options?: RequestOptions): APIPromise<Message>;
  create(params: MessageCreateParamsStreaming, options?: RequestOptions): APIPromise<Stream<RawMessageStreamEvent>>;
  stream(params: MessageStreamParams, options?: RequestOptions): MessageStream;
  countTokens(params: MessageCountTokensParams, options?: RequestOptions): APIPromise<MessageTokensCount>;
}

Creating Messages

Basic Message Creation

client.messages.create(params: MessageCreateParams): APIPromise<Message>;

interface MessageCreateParams {
  model: string;              // Required: Model identifier
  max_tokens: number;         // Required: Maximum tokens to generate
  messages: MessageParam[];   // Required: Conversation messages

  // Optional parameters
  stream?: boolean;           // Enable streaming
  system?: string | SystemBlockParam[];  // System prompt
  temperature?: number;       // 0-1, default varies by model
  top_k?: number;            // Top-k sampling
  top_p?: number;            // 0-1, nucleus sampling
  stop_sequences?: string[]; // Stop generation on these sequences
  metadata?: Metadata;       // User-defined metadata
  tools?: Tool[];            // Available tools
  tool_choice?: ToolChoice;  // Tool selection strategy
  thinking?: ThinkingConfigParam;  // Extended thinking configuration
  citations?: CitationsConfigParam;  // Citation generation
}

Example:

import Anthropic from '@anthropic-ai/sdk';

const client = new Anthropic();

const message = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  messages: [
    {
      role: 'user',
      content: 'What is the capital of France?'
    }
  ],
});

console.log(message.content[0].text); // "The capital of France is Paris."

Multi-Turn Conversations

const message = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  messages: [
    {
      role: 'user',
      content: 'Hello, my name is Alice.'
    },
    {
      role: 'assistant',
      content: 'Hello Alice! Nice to meet you. How can I help you today?'
    },
    {
      role: 'user',
      content: 'What did I just tell you my name was?'
    }
  ],
});

console.log(message.content[0].text); // "You told me your name is Alice."

System Prompts

System prompts guide Claude's behavior:

const message = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  system: 'You are a helpful assistant that speaks like a pirate.',
  messages: [
    {
      role: 'user',
      content: 'Tell me about the weather.'
    }
  ],
});

Advanced system with cache control:

const message = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  system: [
    {
      type: 'text',
      text: 'You are an expert in quantum physics.',
    },
    {
      type: 'text',
      text: 'Here is a large corpus of physics papers...',
      cache_control: { type: 'ephemeral' }, // Cache this content
    }
  ],
  messages: [
    {
      role: 'user',
      content: 'Explain quantum entanglement.'
    }
  ],
});

Message Structure

MessageParam (Input)

interface MessageParam {
  role: 'user' | 'assistant';
  content: string | ContentBlockParam[];
}

// Simple text content
type MessageParam = {
  role: 'user' | 'assistant';
  content: string;
};

// Structured content blocks
type MessageParam = {
  role: 'user' | 'assistant';
  content: ContentBlockParam[];
};

Message (Output)

interface Message {
  id: string;                    // Unique message ID
  type: 'message';
  role: 'assistant';
  content: ContentBlock[];       // Generated content blocks
  model: string;                 // Model that generated response
  stop_reason: StopReason | null;
  stop_sequence: string | null;
  usage: Usage;                  // Token usage information
  _request_id?: string;          // Request ID for debugging
}

type StopReason =
  | 'end_turn'        // Natural completion
  | 'max_tokens'      // Reached max_tokens limit
  | 'stop_sequence'   // Hit a stop sequence
  | 'tool_use'        // Wants to use a tool
  | 'server_tool_use' // Server-side tool execution
  ;

interface Usage {
  input_tokens: number;
  output_tokens: number;
  cache_creation_input_tokens?: number;  // Tokens cached (first use)
  cache_read_input_tokens?: number;      // Tokens read from cache
}

Content Blocks

Input Content Blocks

type ContentBlockParam =
  | TextBlockParam
  | ImageBlockParam
  | DocumentBlockParam
  | ToolUseBlockParam
  | ToolResultBlockParam
  | ThinkingBlockParam
  | RedactedThinkingBlockParam
  | ServerToolUseBlockParam
  | SearchResultBlockParam
  | WebSearchResultBlockParam
  ;

// Text content
interface TextBlockParam {
  type: 'text';
  text: string;
  cache_control?: CacheControlEphemeral;
  citations?: TextCitationParam[];
}

// Image content
interface ImageBlockParam {
  type: 'image';
  source: Base64ImageSource | URLImageSource;
  cache_control?: CacheControlEphemeral;
}

interface Base64ImageSource {
  type: 'base64';
  media_type: 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp';
  data: string;  // Base64-encoded image
}

interface URLImageSource {
  type: 'url';
  url: string;
}

// Document content (PDFs, text files)
interface DocumentBlockParam {
  type: 'document';
  source: Base64PDFSource | URLPDFSource | PlainTextSource;
  cache_control?: CacheControlEphemeral;
}

interface Base64PDFSource {
  type: 'base64';
  media_type: 'application/pdf';
  data: string;  // Base64-encoded PDF
}

interface URLPDFSource {
  type: 'url';
  url: string;
  media_type: 'application/pdf';
}

interface PlainTextSource {
  type: 'text';
  media_type: 'text/plain';
  data: string;
}

// Tool use request (from assistant)
interface ToolUseBlockParam {
  type: 'tool_use';
  id: string;
  name: string;
  input: Record<string, any>;
  cache_control?: CacheControlEphemeral;
}

// Tool result (from user)
interface ToolResultBlockParam {
  type: 'tool_result';
  tool_use_id: string;
  content?: string | ContentBlockParam[];
  is_error?: boolean;
  cache_control?: CacheControlEphemeral;
}

Output Content Blocks

type ContentBlock =
  | TextBlock
  | ThinkingBlock
  | RedactedThinkingBlock
  | ToolUseBlock
  | ServerToolUseBlock
  | WebSearchResultBlock
  ;

interface TextBlock {
  type: 'text';
  text: string;
  citations?: TextCitation[];
}

interface ThinkingBlock {
  type: 'thinking';
  thinking: string;
  signature?: string;
}

interface RedactedThinkingBlock {
  type: 'redacted_thinking';
  signature: string;
}

interface ToolUseBlock {
  type: 'tool_use';
  id: string;
  name: string;
  input: Record<string, any>;
}

interface ServerToolUseBlock {
  type: 'server_tool_use';
  id: string;
  name: string;
  input: Record<string, any>;
}

interface WebSearchResultBlock {
  type: 'web_search_result';
  query: string;
  results: Array<{
    url: string;
    title: string;
    snippet: string;
  }>;
}

Examples

Text with Images

const message = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'image',
          source: {
            type: 'base64',
            media_type: 'image/jpeg',
            data: '/9j/4AAQSkZJRgABAQAA...',  // Base64 image data
          },
        },
        {
          type: 'text',
          text: 'What is in this image?',
        },
      ],
    },
  ],
});

From URL:

const message = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'image',
          source: {
            type: 'url',
            url: 'https://example.com/image.jpg',
          },
        },
        {
          type: 'text',
          text: 'Describe this image.',
        },
      ],
    },
  ],
});

Documents (PDFs)

import fs from 'fs';

const pdfData = fs.readFileSync('document.pdf', 'base64');

const message = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'document',
          source: {
            type: 'base64',
            media_type: 'application/pdf',
            data: pdfData,
          },
        },
        {
          type: 'text',
          text: 'Summarize this document.',
        },
      ],
    },
  ],
});

Temperature and Sampling

Control randomness and creativity:

// More creative (higher randomness)
const creative = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  temperature: 1.0,  // Range: 0-1
  messages: [
    {
      role: 'user',
      content: 'Write a creative story about a robot.',
    }
  ],
});

// More focused (lower randomness)
const focused = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  temperature: 0.2,
  top_p: 0.9,        // Nucleus sampling
  messages: [
    {
      role: 'user',
      content: 'What is 2+2?',
    }
  ],
});

Stop Sequences

Stop generation at specific strings:

const message = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  stop_sequences: ['\n\nHuman:', 'END'],
  messages: [
    {
      role: 'user',
      content: 'Count from 1 to 10, one per line.',
    }
  ],
});

// Will stop if it generates '\n\nHuman:' or 'END'
if (message.stop_reason === 'stop_sequence') {
  console.log('Stopped at:', message.stop_sequence);
}

Metadata

Attach custom metadata for tracking:

const message = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  metadata: {
    user_id: 'user_123',
    session_id: 'session_456',
    environment: 'production',
  },
  messages: [
    {
      role: 'user',
      content: 'Hello!',
    }
  ],
});

Prompt Caching

Cache frequently used content to reduce latency and costs:

// First request - creates cache
const message1 = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  system: [
    {
      type: 'text',
      text: 'You are an expert legal analyst.',
    },
    {
      type: 'text',
      text: largeDocumentText,  // Large document
      cache_control: {
        type: 'ephemeral',
        ttl: '5m',  // or '1h'
      },
    },
  ],
  messages: [
    {
      role: 'user',
      content: 'Analyze section 1.',
    }
  ],
});

console.log(message1.usage);
// {
//   input_tokens: 1000,
//   cache_creation_input_tokens: 5000,  // Cached
//   output_tokens: 500
// }

// Second request - uses cache
const message2 = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  system: [
    {
      type: 'text',
      text: 'You are an expert legal analyst.',
    },
    {
      type: 'text',
      text: largeDocumentText,  // Same content
      cache_control: { type: 'ephemeral', ttl: '5m' },
    },
  ],
  messages: [
    {
      role: 'user',
      content: 'Analyze section 2.',  // Different question
    }
  ],
});

console.log(message2.usage);
// {
//   input_tokens: 1000,
//   cache_read_input_tokens: 5000,  // Read from cache
//   output_tokens: 500
// }

Cache TTL options:

  • 5m: 5 minutes (default)
  • 1h: 1 hour

Caching works for:

  • System prompts
  • User messages
  • Tool definitions
  • Documents and images

Extended Thinking

Access Claude's reasoning process:

const message = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 4096,
  thinking: {
    type: 'enabled',
    budget_tokens: 2000,  // Max tokens for thinking
  },
  messages: [
    {
      role: 'user',
      content: 'Solve this complex math problem: ...',
    }
  ],
});

// Response includes thinking blocks
for (const block of message.content) {
  if (block.type === 'thinking') {
    console.log('Reasoning:', block.thinking);
    console.log('Signature:', block.signature);
  } else if (block.type === 'text') {
    console.log('Answer:', block.text);
  }
}

Disable thinking:

const message = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  thinking: {
    type: 'disabled',
  },
  messages: [/* ... */],
});

Citations

Request citations for generated text:

const message = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  citations: {
    enabled: true,
  },
  messages: [
    {
      role: 'user',
      content: [
        {
          type: 'document',
          source: {
            type: 'text',
            media_type: 'text/plain',
            data: 'The capital of France is Paris. It has a population of 2.1 million.',
          },
        },
        {
          type: 'text',
          text: 'What is the population of the capital?',
        },
      ],
    }
  ],
});

// Response includes citations
const textBlock = message.content[0];
if (textBlock.type === 'text' && textBlock.citations) {
  for (const citation of textBlock.citations) {
    console.log('Cited text:', citation.cited_text);
    console.log('Location:', citation.location);
  }
}

Token Counting

Count tokens before creating a message:

client.messages.countTokens(params: MessageCountTokensParams): APIPromise<MessageTokensCount>;

interface MessageCountTokensParams {
  model: string;
  messages: MessageParam[];
  system?: string | SystemBlockParam[];
  tools?: MessageCountTokensTool[];
  tool_choice?: ToolChoice;
}

interface MessageTokensCount {
  input_tokens: number;
}

Example:

const tokenCount = await client.messages.countTokens({
  model: 'claude-sonnet-4-5-20250929',
  messages: [
    {
      role: 'user',
      content: 'What is the weather like today?',
    }
  ],
});

console.log('Input tokens:', tokenCount.input_tokens);

// Use count to validate before expensive operation
if (tokenCount.input_tokens < 1000) {
  const message = await client.messages.create({
    model: 'claude-sonnet-4-5-20250929',
    max_tokens: 1024,
    messages: [
      {
        role: 'user',
        content: 'What is the weather like today?',
      }
    ],
  });
}

Response Access

Standard Promise

const message = await client.messages.create({ /* ... */ });
console.log(message.content);
console.log(message.usage);

With Response Metadata

const { data, response, request_id } = await client.messages
  .create({ /* ... */ })
  .withResponse();

console.log('Message:', data);
console.log('Status:', response.status);
console.log('Request ID:', request_id);
console.log('Rate limit remaining:', response.headers.get('x-ratelimit-remaining'));

Raw Response

const response = await client.messages
  .create({ /* ... */ })
  .asResponse();

console.log('Status:', response.status);
console.log('Headers:', response.headers);

// Parse body manually if needed
const message = await response.json();

Streaming

For real-time responses, use streaming mode:

const stream = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  stream: true,  // Enable streaming
  messages: [
    {
      role: 'user',
      content: 'Write a haiku about programming.',
    }
  ],
});

for await (const event of stream) {
  if (event.type === 'content_block_delta') {
    if (event.delta.type === 'text_delta') {
      process.stdout.write(event.delta.text);
    }
  }
}

Or use the enhanced stream helper:

const stream = client.messages.stream({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  messages: [
    {
      role: 'user',
      content: 'Write a story.',
    }
  ],
});

stream.on('text', (text) => {
  console.log('Text delta:', text);
});

const message = await stream.finalMessage();
console.log('Complete message:', message);

See streaming.md for complete streaming documentation.

Tool Use

Enable Claude to call functions:

const message = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  tools: [
    {
      name: 'get_weather',
      description: 'Get the current weather in a location',
      input_schema: {
        type: 'object',
        properties: {
          location: {
            type: 'string',
            description: 'City and state, e.g., San Francisco, CA',
          },
          unit: {
            type: 'string',
            enum: ['celsius', 'fahrenheit'],
            description: 'Temperature unit',
          },
        },
        required: ['location'],
      },
    },
  ],
  messages: [
    {
      role: 'user',
      content: 'What is the weather in San Francisco?',
    }
  ],
});

// Check if Claude wants to use a tool
if (message.stop_reason === 'tool_use') {
  const toolUse = message.content.find(block => block.type === 'tool_use');
  console.log('Tool:', toolUse.name);
  console.log('Input:', toolUse.input);

  // Execute tool and continue conversation
  const toolResult = getWeather(toolUse.input.location);

  const followUp = await client.messages.create({
    model: 'claude-sonnet-4-5-20250929',
    max_tokens: 1024,
    tools: [/* same tools */],
    messages: [
      {
        role: 'user',
        content: 'What is the weather in San Francisco?',
      },
      {
        role: 'assistant',
        content: message.content,  // Include tool use
      },
      {
        role: 'user',
        content: [
          {
            type: 'tool_result',
            tool_use_id: toolUse.id,
            content: toolResult,
          },
        ],
      },
    ],
  });
}

See tools.md for comprehensive tool documentation including automatic execution with toolRunner().

Model Selection

Available models:

type Model =
  | 'claude-opus-4-5-20250514'
  | 'claude-sonnet-4-5-20250929'
  | 'claude-3-5-sonnet-20241022'
  | 'claude-3-5-haiku-20241022'
  | 'claude-3-opus-20240229'
  | 'claude-3-sonnet-20240229'
  | 'claude-3-haiku-20240307'
  // ... and more
  ;

Choose based on your needs:

  • Claude Opus 4.5: Most capable, complex reasoning
  • Claude Sonnet 4.5: Balance of intelligence and speed
  • Claude Haiku 3.5: Fast and efficient for simpler tasks

Error Handling

Handle API errors:

try {
  const message = await client.messages.create({
    model: 'claude-sonnet-4-5-20250929',
    max_tokens: 1024,
    messages: [
      {
        role: 'user',
        content: 'Hello!',
      }
    ],
  });
} catch (error) {
  if (error instanceof Anthropic.APIError) {
    console.error('Status:', error.status);
    console.error('Message:', error.message);
    console.error('Request ID:', error.requestID);

    if (error.status === 429) {
      // Rate limit - retry with backoff
    } else if (error.status === 529) {
      // Overloaded - wait and retry
    }
  }
  throw error;
}

See errors.md for complete error documentation.

Best Practices

Conversation Management

// ✅ Good: Maintain conversation state
const conversation: MessageParam[] = [
  { role: 'user', content: 'Hi, I am Alice.' },
];

const response1 = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  messages: conversation,
});

// Add assistant response to conversation
conversation.push({
  role: 'assistant',
  content: response1.content,
});

// Continue conversation
conversation.push({
  role: 'user',
  content: 'What is my name?',
});

const response2 = await client.messages.create({
  model: 'claude-sonnet-4-5-20250929',
  max_tokens: 1024,
  messages: conversation,
});

Token Management

// ✅ Good: Check token usage
const message = await client.messages.create({ /* ... */ });
console.log('Tokens used:', message.usage.input_tokens + message.usage.output_tokens);

// ✅ Good: Use countTokens for validation
const count = await client.messages.countTokens({ /* ... */ });
if (count.input_tokens > 100000) {
  console.warn('Input is very large, consider summarizing');
}

Prompt Caching

// ✅ Good: Cache expensive content
// Mark only the content you want to reuse
const baseSystem = [
  { type: 'text', text: 'You are a helpful assistant.' },
  {
    type: 'text',
    text: largeKnowledgeBase,
    cache_control: { type: 'ephemeral', ttl: '1h' },  // Cache for 1 hour
  },
];

// Reuse across multiple requests
for (const question of questions) {
  await client.messages.create({
    model: 'claude-sonnet-4-5-20250929',
    max_tokens: 1024,
    system: baseSystem,  // Reuses cached content
    messages: [{ role: 'user', content: question }],
  });
}

See Also