diff --git a/.tessl/missing-tiles.txt b/.tessl/missing-tiles.txt index 4b1b799..12009a4 100644 --- a/.tessl/missing-tiles.txt +++ b/.tessl/missing-tiles.txt @@ -1,6 +1,5 @@ # Notable dependencies without tessl tiles # PyPI -tessl/pypi-anthropic tessl/pypi-ruff tessl/pypi-isort tessl/pypi-junitparser @@ -8,14 +7,7 @@ tessl/pypi-pytest-asyncio tessl/pypi-respx tessl/pypi-cachetools tessl/pypi-vulture -tessl/pypi-testcontainers # test-optional (large ML packages) -tessl/pypi-pandas tessl/pypi-scipy -tessl/pypi-torch -tessl/pypi-tensorflow -tessl/pypi-jax tessl/pypi-xarray -tessl/pypi-pyarrow tessl/pypi-numba -tessl/pypi-pyrsistent diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/api/batches.md b/.tessl/tiles/tessl/pypi-anthropic/docs/api/batches.md new file mode 100644 index 0000000..7f1b4db --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/api/batches.md @@ -0,0 +1,199 @@ +# Batches API Reference + +Process multiple messages asynchronously in batches for high-throughput use cases with 50% cost reduction. + +## Create Batch + +```python { .api } +def create( + self, + *, + requests: list[MessageBatchIndividualRequest], + **kwargs +) -> MessageBatch: + """ + Create a batch of message requests. + + Parameters: + requests: List of individual message requests with custom_id + + Returns: + MessageBatch with id, processing_status, request counts + """ + ... + +async def create(...) -> MessageBatch: ... +``` + +## Retrieve Batch + +```python { .api } +def retrieve( + self, + message_batch_id: str, + **kwargs +) -> MessageBatch: + """Retrieve batch status and metadata.""" + ... + +async def retrieve(...) -> MessageBatch: ... +``` + +## List Batches + +```python { .api } +def list( + self, + *, + before_id: str = NOT_GIVEN, + after_id: str = NOT_GIVEN, + limit: int = NOT_GIVEN, + **kwargs +) -> SyncPage[MessageBatch]: + """List batches with pagination.""" + ... + +def list(...) -> AsyncPage[MessageBatch]: ... +``` + +## Cancel Batch + +```python { .api } +def cancel( + self, + message_batch_id: str, + **kwargs +) -> MessageBatch: + """Cancel a batch in progress.""" + ... + +async def cancel(...) -> MessageBatch: ... +``` + +## Delete Batch + +```python { .api } +def delete( + self, + message_batch_id: str, + **kwargs +) -> DeletedMessageBatch: + """Delete a batch.""" + ... + +async def delete(...) -> DeletedMessageBatch: ... +``` + +## Get Results + +```python { .api } +def results( + self, + message_batch_id: str, + **kwargs +) -> JSONLDecoder[MessageBatchIndividualResponse]: + """Stream batch results as JSONL.""" + ... + +def results(...) -> AsyncJSONLDecoder[MessageBatchIndividualResponse]: ... +``` + +## Response Types + +```python { .api } +class MessageBatch(BaseModel): + """Batch metadata and status.""" + id: str + type: Literal["message_batch"] + processing_status: Literal["in_progress", "canceling", "ended"] + request_counts: MessageBatchRequestCounts + ended_at: str | None + created_at: str + expires_at: str + cancel_initiated_at: str | None + results_url: str | None + +class MessageBatchRequestCounts(BaseModel): + """Request count statistics.""" + processing: int + succeeded: int + errored: int + canceled: int + expired: int +``` + +## Request Types + +```python { .api } +class MessageBatchIndividualRequest(TypedDict): + """Individual request in batch.""" + custom_id: str + params: MessageCreateParams +``` + +## Quick Examples + +### Create Basic Batch + +```python +batch = client.messages.batches.create( + requests=[ + { + "custom_id": "request-1", + "params": { + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 1024, + "messages": [{"role": "user", "content": "What is AI?"}] + } + }, + { + "custom_id": "request-2", + "params": { + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 1024, + "messages": [{"role": "user", "content": "What is ML?"}] + } + } + ] +) +print(f"Batch ID: {batch.id}") +``` + +### Check Status + +```python +batch = client.messages.batches.retrieve("batch_abc123") +print(f"Status: {batch.processing_status}") +print(f"Succeeded: {batch.request_counts.succeeded}") +print(f"Errored: {batch.request_counts.errored}") +``` + +### Poll Until Complete + +```python +import time + +while True: + batch = client.messages.batches.retrieve(batch_id) + if batch.processing_status == "ended": + break + time.sleep(60) +``` + +### Get Results + +```python +results = client.messages.batches.results("batch_abc123") + +for response in results: + if response.result.type == "succeeded": + print(f"{response.custom_id}: {response.result.message.content[0].text}") + elif response.result.type == "errored": + print(f"{response.custom_id}: Error - {response.result.error.message}") +``` + +## See Also + +- [Messages API](./messages.md) - Core message creation +- [Batch Processing Guide](../guides/batch-processing.md) - Advanced batch patterns +- [Type System](../reference/types.md) - Complete type definitions diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/api/completions.md b/.tessl/tiles/tessl/pypi-anthropic/docs/api/completions.md new file mode 100644 index 0000000..6071805 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/api/completions.md @@ -0,0 +1,285 @@ +# Completions API Reference (Legacy) + +The Text Completions API is the legacy interface for text generation with Claude. For new applications, use the [Messages API](./messages.md) instead, which provides better conversation handling and additional features. + +## Overview + +The Completions API generates text based on a prompt string. It uses special prompt markers (`HUMAN_PROMPT` and `AI_PROMPT`) to structure conversations. + +**Deprecation Notice**: This API is maintained for backward compatibility. New applications should use the Messages API for improved functionality and support. + +## Create Completion + +```python { .api } +def create( + self, + *, + model: str, + prompt: str, + max_tokens_to_sample: int, + stop_sequences: list[str] = NOT_GIVEN, + temperature: float = NOT_GIVEN, + top_p: float = NOT_GIVEN, + top_k: int = NOT_GIVEN, + metadata: dict[str, Any] = NOT_GIVEN, + stream: bool = False, +) -> Completion: + """ + Create a text completion. + + Parameters: + model: Model identifier. Examples: "claude-2.1", "claude-instant-1.2" + prompt: Formatted prompt string with HUMAN_PROMPT and AI_PROMPT markers + max_tokens_to_sample: Maximum tokens to generate (required) + stop_sequences: List of sequences that stop generation when encountered + temperature: Sampling temperature 0.0-1.0 (default: 1.0) + top_p: Nucleus sampling parameter 0.0-1.0 + top_k: Top-k sampling parameter + metadata: Request metadata for tracking + stream: Enable streaming responses + + Returns: + Completion: Response containing generated text and stop reason + + Raises: + BadRequestError: Invalid request parameters + AuthenticationError: Invalid or missing API key + RateLimitError: Rate limit exceeded + """ + ... + +async def create(...) -> Completion: + """ + Async version of create. + + Same parameters and returns as synchronous create(), but executes asynchronously. + """ + ... +``` + +## Stream Completion + +```python { .api } +def stream( + self, + *, + model: str, + prompt: str, + max_tokens_to_sample: int, + **kwargs +) -> Iterator[Completion]: + """ + Stream a completion response. + + Yields partial completion objects as text is generated. + + Parameters: + Same as create() method + + Yields: + Completion objects with incremental text + + Example: + for chunk in client.completions.stream( + model="claude-2.1", + prompt=f"{HUMAN_PROMPT} Hello{AI_PROMPT}", + max_tokens_to_sample=100 + ): + print(chunk.completion, end="", flush=True) + """ + ... + +async def stream(...) -> AsyncIterator[Completion]: + """Async version of stream().""" + ... +``` + +## Response Type + +```python { .api } +class Completion(BaseModel): + """ + Text completion response. + + Attributes: + id: Unique completion identifier + type: Always "completion" + completion: Generated text content + stop_reason: Why generation stopped ("stop_sequence", "max_tokens", or "end_turn") + model: Model used for generation + """ + id: str + type: Literal["completion"] + completion: str + stop_reason: str | None + model: str +``` + +## Usage Examples + +### Basic Completion + +```python +from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT + +client = Anthropic() + +completion = client.completions.create( + model="claude-2.1", + prompt=f"{HUMAN_PROMPT} What is the capital of France?{AI_PROMPT}", + max_tokens_to_sample=100 +) + +print(completion.completion) +``` + +### Multi-turn Conversation + +```python +# Build conversation with prompt markers +conversation = f"""{HUMAN_PROMPT} Hello, my name is Alice.{AI_PROMPT} Hi Alice! Nice to meet you.{HUMAN_PROMPT} What's my name?{AI_PROMPT}""" + +completion = client.completions.create( + model="claude-2.1", + prompt=conversation, + max_tokens_to_sample=50 +) + +print(completion.completion) +# Output: "Your name is Alice." +``` + +### Streaming Completion + +```python +prompt = f"{HUMAN_PROMPT} Write a short story about a robot.{AI_PROMPT}" + +for chunk in client.completions.stream( + model="claude-2.1", + prompt=prompt, + max_tokens_to_sample=500 +): + print(chunk.completion, end="", flush=True) +``` + +### With Stop Sequences + +```python +completion = client.completions.create( + model="claude-2.1", + prompt=f"{HUMAN_PROMPT} List three colors:{AI_PROMPT}", + max_tokens_to_sample=100, + stop_sequences=["\n\n", "4."] # Stop after listing 3 items +) +``` + +### Temperature Control + +```python +# More deterministic (lower temperature) +completion = client.completions.create( + model="claude-2.1", + prompt=f"{HUMAN_PROMPT} What is 2+2?{AI_PROMPT}", + max_tokens_to_sample=10, + temperature=0.0 +) + +# More creative (higher temperature) +creative = client.completions.create( + model="claude-2.1", + prompt=f"{HUMAN_PROMPT} Write a creative story opening.{AI_PROMPT}", + max_tokens_to_sample=200, + temperature=1.0 +) +``` + +### Async Completion + +```python +import asyncio + +async def generate(): + client = AsyncAnthropic() + + completion = await client.completions.create( + model="claude-2.1", + prompt=f"{HUMAN_PROMPT} Hello{AI_PROMPT}", + max_tokens_to_sample=50 + ) + + return completion.completion + +result = asyncio.run(generate()) +``` + +### Async Streaming + +```python +async def stream_completion(): + client = AsyncAnthropic() + + async for chunk in client.completions.stream( + model="claude-2.1", + prompt=f"{HUMAN_PROMPT} Tell me a joke{AI_PROMPT}", + max_tokens_to_sample=200 + ): + print(chunk.completion, end="", flush=True) + +asyncio.run(stream_completion()) +``` + +## Migration to Messages API + +The Messages API provides better conversation handling and additional features. Here's how to migrate: + +### Completions API (Legacy) + +```python +from anthropic import HUMAN_PROMPT, AI_PROMPT + +completion = client.completions.create( + model="claude-2.1", + prompt=f"{HUMAN_PROMPT} Hello{AI_PROMPT}", + max_tokens_to_sample=100 +) +print(completion.completion) +``` + +### Messages API (Recommended) + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=100, + messages=[{"role": "user", "content": "Hello"}] +) +print(message.content[0].text) +``` + +### Benefits of Messages API + +- Better conversation structure (no manual prompt markers) +- Support for system prompts +- Multimodal input (images, documents) +- Tool/function calling +- Better type safety +- Streaming helpers +- Token counting utilities + +## Constants + +```python { .api } +from anthropic import HUMAN_PROMPT, AI_PROMPT + +HUMAN_PROMPT: str = "\n\nHuman:" # Marker for user messages +AI_PROMPT: str = "\n\nAssistant:" # Marker for assistant responses +``` + +These constants are used to structure prompts in the Completions API format. + +## See Also + +- [Messages API](./messages.md) - Modern conversation API (recommended) +- [Streaming API](./streaming.md) - Streaming patterns and helpers +- [Client Configuration](../reference/client-config.md) - Client setup +- [Package Constants](../index.md#package-constants) - All SDK constants diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/api/messages.md b/.tessl/tiles/tessl/pypi-anthropic/docs/api/messages.md new file mode 100644 index 0000000..c17da99 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/api/messages.md @@ -0,0 +1,716 @@ +# Messages API Reference + +The Messages API is the primary interface for conversational interactions with Claude. + +## Create Message + +```python { .api } +def create( + self, + *, + model: str, + messages: list[MessageParam], + max_tokens: int, + system: str | list[TextBlockParam] = NOT_GIVEN, + metadata: MetadataParam = NOT_GIVEN, + stop_sequences: list[str] = NOT_GIVEN, + stream: bool = False, + temperature: float = NOT_GIVEN, + top_p: float = NOT_GIVEN, + top_k: int = NOT_GIVEN, + tools: list[ToolParam] = NOT_GIVEN, + tool_choice: ToolChoice = NOT_GIVEN, + service_tier: Literal["auto", "standard_only"] = NOT_GIVEN, + thinking: ThinkingConfigParam = NOT_GIVEN, +) -> Message: + """ + Create a message with Claude. + + Parameters: + model: Model identifier (required). Examples: "claude-sonnet-4-5-20250929", + "claude-opus-4-5-20250929" + messages: List of conversation messages alternating between "user" and "assistant" + roles (required). Must start with user message. + max_tokens: Maximum number of tokens to generate (required). Must be positive integer. + system: System prompt to guide Claude's behavior. Can be string or list of text blocks + with cache control. Use for setting persona, instructions, or context. + metadata: Request metadata containing user_id for tracking and abuse prevention. + stop_sequences: List of strings that will stop generation when encountered. + Up to 4 sequences allowed. + stream: Enable streaming responses. If true, use stream() method instead for + better streaming helpers. + temperature: Sampling temperature 0.0-1.0 (default: 1.0). Lower values = more + deterministic, higher values = more creative. + top_p: Nucleus sampling parameter 0.0-1.0. Cumulative probability threshold + for token selection. + top_k: Top-k sampling parameter. Only sample from top K most likely tokens. + tools: List of tools Claude can use. Enables function calling capabilities. + tool_choice: Control how Claude selects tools. Options: "auto" (default), "any" + (force tool use), "none" (disable tools), or specific tool name. + service_tier: Service tier selection. "auto" (default) or "standard_only" for + guaranteed standard capacity. + thinking: Extended thinking configuration (beta). Enables enhanced reasoning with + budget control. + + Returns: + Message: Complete message response from Claude containing: + - id: Unique message identifier + - content: List of text and/or tool_use blocks + - role: Always "assistant" + - stop_reason: Why generation stopped + - usage: Token usage statistics + - model: Model that generated response + + Raises: + BadRequestError: Invalid request parameters (e.g., empty messages, invalid model) + AuthenticationError: Invalid or missing API key + PermissionDeniedError: API key lacks required permissions + RateLimitError: Rate limit exceeded. Check retry-after header. + RequestTooLargeError: Request payload exceeds size limits + InternalServerError: Anthropic server error. Safe to retry. + OverloadedError: Service temporarily overloaded. Retry with backoff. + """ + ... + +async def create(...) -> Message: + """ + Async version of create. + + Same parameters and returns as synchronous create(), but executes asynchronously. + """ + ... +``` + +## Stream Message + +```python { .api } +def stream( + self, + *, + model: str, + messages: list[MessageParam], + max_tokens: int, + **kwargs +) -> MessageStreamManager: + """ + Stream a message response with helper utilities. + + Provides a context manager that handles streaming with convenient helpers + for text accumulation, final message access, and event handling. + + Parameters: + model: Model identifier (required) + messages: List of conversation messages (required) + max_tokens: Maximum tokens to generate (required) + **kwargs: All other parameters from create() method supported + + Returns: + MessageStreamManager: Context manager that yields MessageStreamEvent objects. + Provides helpers: + - .text_stream: Iterator of text deltas only + - .get_final_message(): Get complete Message after stream ends + - .get_final_text(): Get accumulated text after stream ends + - .current_message_snapshot: Current state during streaming + + Raises: + Same exceptions as create() method + + Example: + with client.messages.stream(...) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) + """ + ... + +def stream(...) -> AsyncMessageStreamManager: + """ + Async version of stream. + + Same parameters and returns as synchronous stream(), but executes asynchronously. + Use with `async with` syntax. + """ + ... +``` + +## Count Tokens + +```python { .api } +def count_tokens( + self, + *, + model: str, + messages: list[MessageParam], + system: str | list[TextBlockParam] = NOT_GIVEN, + tools: list[ToolParam] = NOT_GIVEN, + tool_choice: ToolChoice = NOT_GIVEN, + thinking: ThinkingConfigParam = NOT_GIVEN, +) -> MessageTokensCount: + """ + Count input tokens without creating a message. + + Useful for estimating costs before making API calls or checking if + input fits within model's context window. + + Parameters: + model: Model identifier (required). Token counts vary by model. + messages: List of conversation messages to count (required) + system: System prompt to include in count + tools: Tools to include in count + tool_choice: Tool choice configuration to include in count + thinking: Thinking configuration to include in count + + Returns: + MessageTokensCount: Object containing: + - input_tokens: Total number of input tokens + + Raises: + BadRequestError: Invalid request parameters + AuthenticationError: Invalid or missing API key + RateLimitError: Rate limit exceeded + + Note: + Token count is exact for billing purposes. Includes all overhead + from system prompt, tools, and message formatting. + """ + ... + +async def count_tokens(...) -> MessageTokensCount: + """ + Async version of count_tokens. + + Same parameters and returns as synchronous count_tokens(), but executes asynchronously. + """ + ... +``` + +## Response Types + +### Message + +```python { .api } +class Message(BaseModel): + """ + Complete message response from Claude. + + Attributes: + id: Unique message identifier (e.g., "msg_01XYZ...") + type: Always "message" + role: Always "assistant" for Claude's responses + content: List of content blocks (text, tool_use) in response + model: Model identifier used for generation + stop_reason: Reason generation stopped: + - "end_turn": Natural completion point + - "max_tokens": Hit max_tokens limit + - "stop_sequence": Hit custom stop sequence + - "tool_use": Model wants to use a tool + stop_sequence: Specific stop sequence that triggered completion (if applicable) + usage: Token usage statistics for billing and tracking + """ + id: str + type: Literal["message"] + role: Literal["assistant"] + content: list[ContentBlock] # TextBlock | ToolUseBlock + model: str + stop_reason: StopReason | None + stop_sequence: str | None + usage: Usage +``` + +### ContentBlock + +```python { .api } +ContentBlock = Union[TextBlock, ToolUseBlock] + +class TextBlock(BaseModel): + """ + Text content block in response. + + Attributes: + type: Always "text" + text: The text content generated by Claude + """ + type: Literal["text"] + text: str + +class ToolUseBlock(BaseModel): + """ + Tool invocation request from Claude. + + Attributes: + type: Always "tool_use" + id: Unique identifier for this tool call (use in tool_result) + name: Name of tool Claude wants to invoke + input: Tool input parameters as dict matching tool's input_schema + """ + type: Literal["tool_use"] + id: str + name: str + input: dict[str, Any] +``` + +### Usage + +```python { .api } +class Usage(BaseModel): + """ + Token usage statistics for billing and optimization. + + Attributes: + input_tokens: Number of tokens in the input (prompt, system, tools) + output_tokens: Number of tokens generated in response + cache_creation_input_tokens: Tokens used to create new cache entries + (charged at cache creation rate) + cache_read_input_tokens: Tokens read from cache (charged at reduced rate) + """ + input_tokens: int + output_tokens: int + cache_creation_input_tokens: int | None + cache_read_input_tokens: int | None +``` + +### Stop Reasons + +```python { .api } +StopReason = Literal[ + "end_turn", # Natural completion - Claude finished responding + "max_tokens", # Hit max_tokens limit - response may be incomplete + "stop_sequence", # Hit custom stop sequence - check stop_sequence field + "tool_use", # Model wants to use a tool - check content for ToolUseBlock +] +``` + +## Request Types + +### MessageParam + +```python { .api } +class MessageParam(TypedDict): + """ + User or assistant message in conversation. + + Fields: + role: Message sender - "user" for input, "assistant" for Claude's responses + content: Message content - string for simple text, or list of content blocks + for multimodal (text, images, documents) or tool-related content + """ + role: Literal["user", "assistant"] + content: str | list[ContentBlockParam] +``` + +### ContentBlockParam + +```python { .api } +ContentBlockParam = Union[ + TextBlockParam, + ImageBlockParam, + DocumentBlockParam, + ToolUseBlockParam, + ToolResultBlockParam, +] + +class TextBlockParam(TypedDict): + """ + Text content in user or assistant message. + + Fields: + type: Always "text" + text: The text content + cache_control: Optional prompt caching configuration. Use {"type": "ephemeral"} + to cache this content block for subsequent requests + """ + type: Literal["text"] + text: str + cache_control: NotRequired[CacheControlEphemeral] + +class ImageBlockParam(TypedDict): + """ + Image content in user message for vision capabilities. + + Fields: + type: Always "image" + source: Image source - base64-encoded data or URL + cache_control: Optional prompt caching for image + """ + type: Literal["image"] + source: Base64ImageSource | URLImageSource + cache_control: NotRequired[CacheControlEphemeral] + +class DocumentBlockParam(TypedDict): + """ + Document content (PDF or text) in user message. + + Fields: + type: Always "document" + source: Document source - base64-encoded PDF/text or URL + cache_control: Optional prompt caching for document + """ + type: Literal["document"] + source: Base64PDFSource | URLPDFSource | PlainTextSource + cache_control: NotRequired[CacheControlEphemeral] + +class ToolUseBlockParam(TypedDict): + """ + Tool invocation in assistant message (when echoing Claude's tool request). + + Fields: + type: Always "tool_use" + id: Tool call identifier from Claude's response + name: Tool name that was invoked + input: Tool input parameters as dict + cache_control: Optional prompt caching + """ + type: Literal["tool_use"] + id: str + name: str + input: dict[str, Any] + cache_control: NotRequired[CacheControlEphemeral] + +class ToolResultBlockParam(TypedDict): + """ + Tool execution result in user message. + + Fields: + type: Always "tool_result" + tool_use_id: ID from Claude's ToolUseBlock that this result corresponds to + content: Tool result - string for simple output, or list of text/image blocks + for structured results + is_error: Set to True if tool execution failed, False otherwise (default: False) + cache_control: Optional prompt caching + """ + type: Literal["tool_result"] + tool_use_id: str + content: NotRequired[str | list[TextBlockParam | ImageBlockParam]] + is_error: NotRequired[bool] + cache_control: NotRequired[CacheControlEphemeral] +``` + +### Source Types + +```python { .api } +class Base64ImageSource(TypedDict): + """ + Base64-encoded image source for vision input. + + Fields: + type: Always "base64" + media_type: Image MIME type - jpeg, png, gif, or webp + data: Base64-encoded image bytes (without data URL prefix) + """ + type: Literal["base64"] + media_type: Literal["image/jpeg", "image/png", "image/gif", "image/webp"] + data: str + +class URLImageSource(TypedDict): + """ + Image from URL for vision input. + + Fields: + type: Always "url" + url: Publicly accessible image URL (must be accessible to Anthropic servers) + """ + type: Literal["url"] + url: str + +class Base64PDFSource(TypedDict): + """ + Base64-encoded PDF document source. + + Fields: + type: Always "base64" + media_type: Always "application/pdf" + data: Base64-encoded PDF bytes (without data URL prefix) + """ + type: Literal["base64"] + media_type: Literal["application/pdf"] + data: str + +class URLPDFSource(TypedDict): + """ + PDF from URL for document processing. + + Fields: + type: Always "url" + media_type: Always "application/pdf" + url: Publicly accessible PDF URL + """ + type: Literal["url"] + media_type: Literal["application/pdf"] + url: str + +class PlainTextSource(TypedDict): + """ + Plain text document source. + + Fields: + type: Always "text" + media_type: Always "text/plain" + data: Plain text content + """ + type: Literal["text"] + media_type: Literal["text/plain"] + data: str +``` + +### Configuration Types + +```python { .api } +class MetadataParam(TypedDict, total=False): + """ + Request metadata for tracking and compliance. + + Fields: + user_id: End-user identifier for tracking, rate limiting, and abuse prevention. + Should be unique per end-user, not per API key. + """ + user_id: str + +class CacheControlEphemeral(TypedDict): + """ + Prompt caching configuration for reducing costs on repeated content. + + Fields: + type: Always "ephemeral" - content cached temporarily (~5 minutes) + """ + type: Literal["ephemeral"] + +class MessageTokensCount(BaseModel): + """ + Token count response from count_tokens(). + + Attributes: + input_tokens: Exact number of input tokens for billing estimation + """ + input_tokens: int +``` + +## Quick Examples + +### Basic Text + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[ + {"role": "user", "content": "Hello"} + ] +) +print(message.content[0].text) +``` + +### System Prompt + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + system="You are a helpful assistant.", + messages=[ + {"role": "user", "content": "Hello"} + ] +) +``` + +### Multi-Turn Conversation + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[ + {"role": "user", "content": "My name is Alice."}, + {"role": "assistant", "content": "Hello Alice! Nice to meet you."}, + {"role": "user", "content": "What's my name?"} + ] +) +``` + +### Image Analysis + +```python +import base64 + +with open("image.jpg", "rb") as f: + image_data = base64.standard_b64encode(f.read()).decode() + +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{ + "role": "user", + "content": [ + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": image_data + } + }, + {"type": "text", "text": "What's in this image?"} + ] + }] +) +``` + +### Document Processing + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{ + "role": "user", + "content": [ + { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": pdf_base64_data + } + }, + {"type": "text", "text": "Summarize this document."} + ] + }] +) +``` + +### Temperature Control + +```python +# Deterministic (temperature = 0) +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + temperature=0.0, + messages=[{"role": "user", "content": "What is 2+2?"}] +) + +# Creative (temperature = 1) +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + temperature=1.0, + messages=[{"role": "user", "content": "Write a creative story."}] +) +``` + +### Prompt Caching + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + system=[ + { + "type": "text", + "text": "You are an expert on Shakespeare.", + "cache_control": {"type": "ephemeral"} + } + ], + messages=[ + {"role": "user", "content": "Tell me about Hamlet."} + ] +) + +# Check cache usage +print(f"Cache creation: {message.usage.cache_creation_input_tokens}") +print(f"Cache read: {message.usage.cache_read_input_tokens}") +``` + +### Token Counting + +```python +token_count = client.messages.count_tokens( + model="claude-sonnet-4-5-20250929", + messages=[ + {"role": "user", "content": "What is the capital of France?"} + ] +) +print(f"Input tokens: {token_count.input_tokens}") +``` + +### Metadata Tracking + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + metadata={"user_id": "user_12345"}, + messages=[{"role": "user", "content": "Hello"}] +) +``` + +## Raw Response Access + +Access raw HTTP responses for headers, status codes, and debugging. + +```python { .api } +# Access raw response wrapper +client.messages.with_raw_response.create(...) # Returns APIResponse[Message] +client.messages.with_streaming_response.stream(...) # Returns APIResponse with streaming + +# Async versions +async_client.messages.with_raw_response.create(...) +async_client.messages.with_streaming_response.stream(...) +``` + +### Raw Response Example + +```python +# Get raw HTTP response +response = client.messages.with_raw_response.create( + model="claude-sonnet-4-5-20250929", + max_tokens=100, + messages=[{"role": "user", "content": "Hello"}] +) + +# Access HTTP response details +print(f"Status: {response.http_response.status_code}") +print(f"Headers: {response.http_response.headers}") +print(f"Request ID: {response.http_response.headers.get('request-id')}") + +# Access parsed message object +message = response.parse() +print(message.content[0].text) +``` + +### Streaming Raw Response + +```python +# Get raw streaming response +with client.messages.with_streaming_response.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=100, + messages=[{"role": "user", "content": "Hello"}] +) as response: + # Access response object before streaming + print(f"Status: {response.http_response.status_code}") + + # Stream normally + for text in response.text_stream: + print(text, end="", flush=True) +``` + +### Check Rate Limit Headers + +```python +response = client.messages.with_raw_response.create( + model="claude-sonnet-4-5-20250929", + max_tokens=100, + messages=[{"role": "user", "content": "Hello"}] +) + +headers = response.http_response.headers +print(f"Rate limit: {headers.get('anthropic-ratelimit-requests-limit')}") +print(f"Remaining: {headers.get('anthropic-ratelimit-requests-remaining')}") +print(f"Reset: {headers.get('anthropic-ratelimit-requests-reset')}") +``` + +## See Also + +- [Streaming API](./streaming.md) - Real-time response streaming +- [Tool Use API](./tools.md) - Function calling capabilities +- [Multimodal Guide](../guides/multimodal.md) - Images and documents +- [Type System](../reference/types.md) - Complete type definitions diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/api/models.md b/.tessl/tiles/tessl/pypi-anthropic/docs/api/models.md new file mode 100644 index 0000000..c3224a3 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/api/models.md @@ -0,0 +1,161 @@ +# Models API Reference + +Retrieve information about available Claude models and their capabilities. + +## Retrieve Model + +```python { .api } +def retrieve( + self, + model_id: str, + **kwargs +) -> ModelInfo: + """ + Retrieve model information. + + Parameters: + model_id: Model identifier (e.g., "claude-sonnet-4-5-20250929") + + Returns: + ModelInfo with model metadata and capabilities + """ + ... + +async def retrieve(...) -> ModelInfo: ... +``` + +## List Models + +```python { .api } +def list( + self, + *, + before_id: str = NOT_GIVEN, + after_id: str = NOT_GIVEN, + limit: int = NOT_GIVEN, + **kwargs +) -> SyncPage[ModelInfo]: + """ + List available models with pagination. + + Returns: + SyncPage of ModelInfo objects with auto-pagination + """ + ... + +def list(...) -> AsyncPage[ModelInfo]: ... +``` + +## Response Type + +```python { .api } +class ModelInfo(BaseModel): + """Model information and capabilities.""" + id: str + type: Literal["model"] + display_name: str + created_at: str +``` + +## Available Models + +### Claude 4.5 (Latest) + +```python { .api } +"claude-opus-4-5-20250929" # Most capable model +"claude-sonnet-4-5-20250929" # Balanced intelligence and speed +``` + +### Claude 3.5 + +```python { .api } +"claude-3-5-sonnet-20241022" # Previous Sonnet version +"claude-3-5-sonnet-20240620" # Earlier Sonnet version +"claude-3-5-haiku-20241022" # Fast, cost-effective +``` + +### Claude 3 + +```python { .api } +"claude-3-opus-20240229" # Powerful, intelligent +"claude-3-sonnet-20240229" # Balanced +"claude-3-haiku-20240307" # Fast and efficient +``` + +### Legacy (Claude 2) + +```python { .api } +"claude-2.1" # Legacy Claude 2.1 +"claude-2.0" # Legacy Claude 2.0 +"claude-instant-1.2" # Legacy instant model +``` + +## Quick Examples + +### Retrieve Specific Model + +```python +model = client.models.retrieve("claude-sonnet-4-5-20250929") +print(f"Model: {model.display_name}") +print(f"Created: {model.created_at}") +``` + +### List All Models + +```python +for model in client.models.list(): + print(f"{model.id}: {model.display_name}") +``` + +### Check Model Availability + +```python +from anthropic import NotFoundError + +def model_exists(model_id: str) -> bool: + try: + client.models.retrieve(model_id) + return True + except NotFoundError: + return False + +if model_exists("claude-sonnet-4-5-20250929"): + print("Model is available") +``` + +### Filter Models by Family + +```python +def get_models_by_family(family: str) -> list[ModelInfo]: + """Get all models in a family (e.g., 'sonnet', 'opus', 'haiku').""" + return [m for m in client.models.list() if family.lower() in m.id.lower()] + +sonnet_models = get_models_by_family("sonnet") +for model in sonnet_models: + print(model.display_name) +``` + +### Model Selection Helper + +```python +def select_model(capability: str = "balanced") -> str: + """ + Select appropriate model based on requirements. + + Args: + capability: "maximum" (opus), "balanced" (sonnet), "fast" (haiku) + """ + if capability == "maximum": + return "claude-opus-4-5-20250929" + elif capability == "fast": + return "claude-3-5-haiku-20241022" + else: # balanced + return "claude-sonnet-4-5-20250929" + +model_id = select_model("balanced") +``` + +## See Also + +- [Messages API](./messages.md) - Core message creation +- [Type System](../reference/types.md) - Complete type definitions diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/api/streaming.md b/.tessl/tiles/tessl/pypi-anthropic/docs/api/streaming.md new file mode 100644 index 0000000..76fda7b --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/api/streaming.md @@ -0,0 +1,838 @@ +# Streaming API Reference + +Stream message responses with rich event handling and helper utilities for incremental processing. + +## Stream Message + +```python { .api } +def stream( + self, + *, + model: str, + messages: list[MessageParam], + max_tokens: int, + system: str | list[TextBlockParam] = NOT_GIVEN, + temperature: float = NOT_GIVEN, + tools: list[ToolParam] = NOT_GIVEN, + **kwargs +) -> MessageStreamManager: + """ + Stream a message response with helper utilities. + + Returns a context manager that provides streaming with convenient helpers + for text accumulation, event handling, and final message access. + + Parameters: + model: Model identifier (required) + messages: List of conversation messages (required) + max_tokens: Maximum tokens to generate (required) + system: System prompt (string or list with cache control) + temperature: Sampling temperature 0.0-1.0 + tools: Available tools for function calling + **kwargs: All other parameters from messages.create() are supported + + Returns: + MessageStreamManager: Context manager that provides: + - MessageStream for iteration + - Helper methods for text and final message extraction + + Raises: + Same exceptions as messages.create() + + Example: + with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] + ) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) + """ + ... + +async def stream( + self, + *, + model: str, + messages: list[MessageParam], + max_tokens: int, + **kwargs +) -> AsyncMessageStreamManager: + """ + Async version of stream. + + Same parameters and behavior as synchronous stream(), but executes + asynchronously. Use with `async with` syntax. + + Returns: + AsyncMessageStreamManager: Async context manager for streaming + """ + ... +``` + +## MessageStream + +```python { .api } +class MessageStream: + """ + Synchronous message stream with rich event handling and helper methods. + + Provides convenient access to streaming events, text deltas, and final message + accumulation. Use via MessageStreamManager context manager. + """ + + def __iter__(self) -> Iterator[MessageStreamEvent]: + """ + Iterate over all stream events. + + Yields: + MessageStreamEvent: Events including message_start, content_block_start, + content_block_delta, content_block_stop, message_delta, message_stop + """ + ... + + def __enter__(self) -> MessageStream: + """Context manager entry. Returns self.""" + ... + + def __exit__(self, exc_type, exc_val, exc_tb) -> None: + """Context manager exit. Closes stream and cleans up resources.""" + ... + + def get_final_message(self) -> Message: + """ + Get final accumulated message after stream completes. + + Must be called after iterating through the stream. Returns complete Message + object with all content blocks, usage statistics, and stop reason. + + Returns: + Message: Complete message with all accumulated content + + Raises: + RuntimeError: If called before stream completes + """ + ... + + def get_final_text(self) -> str: + """ + Get final accumulated text from all text blocks. + + Concatenates text from all TextBlock content blocks in the message. + Useful for simple text-only responses. + + Returns: + str: Concatenated text from all text blocks + + Raises: + RuntimeError: If called before stream completes + """ + ... + + @property + def text_stream(self) -> Iterator[str]: + """ + Iterate over text deltas only, filtering out other events. + + Convenient property for streaming text to console or UI. Automatically + extracts text from content_block_delta events. + + Yields: + str: Text delta strings as they arrive + + Example: + for text in stream.text_stream: + print(text, end="", flush=True) + """ + ... + + @property + def current_message_snapshot(self) -> Message: + """ + Get current accumulated message snapshot during streaming. + + Provides partial Message object with content accumulated so far. Useful + for showing partial results or implementing custom UI updates. + + Returns: + Message: Partial message with current content (usage may be incomplete) + """ + ... + +class AsyncMessageStream: + """ + Asynchronous message stream with rich event handling and helper methods. + + Async version of MessageStream. All methods are async and use async iteration. + """ + + def __aiter__(self) -> AsyncIterator[MessageStreamEvent]: + """ + Async iterate over all stream events. + + Yields: + MessageStreamEvent: Events as they arrive from the API + """ + ... + + async def __aenter__(self) -> AsyncMessageStream: + """Async context manager entry. Returns self.""" + ... + + async def __aexit__(self, exc_type, exc_val, exc_tb) -> None: + """Async context manager exit. Closes stream and cleans up resources.""" + ... + + async def get_final_message(self) -> Message: + """ + Get final accumulated message after stream completes. + + Returns: + Message: Complete message with all accumulated content + """ + ... + + async def get_final_text(self) -> str: + """ + Get final accumulated text from all text blocks. + + Returns: + str: Concatenated text from all text blocks + """ + ... + + @property + def text_stream(self) -> AsyncIterator[str]: + """ + Async iterate over text deltas only. + + Yields: + str: Text delta strings as they arrive + """ + ... + + @property + def current_message_snapshot(self) -> Message: + """ + Get current accumulated message snapshot during streaming. + + Returns: + Message: Partial message with current content + """ + ... +``` + +## MessageStreamManager + +Context manager for streaming messages with helper methods. + +```python { .api } +class MessageStreamManager: + """ + Synchronous context manager for message streaming. + + Provides: + - Context manager protocol + - Access to MessageStream with helpers + """ + def __enter__(self) -> MessageStream: + """Enter context and return stream.""" + ... + + def __exit__(self, exc_type, exc_val, exc_tb) -> None: + """Exit context and cleanup.""" + ... + +class AsyncMessageStreamManager: + """ + Asynchronous context manager for message streaming. + + Provides: + - Async context manager protocol + - Access to AsyncMessageStream with helpers + """ + async def __aenter__(self) -> AsyncMessageStream: + """Enter async context and return stream.""" + ... + + async def __aexit__(self, exc_type, exc_val, exc_tb) -> None: + """Exit async context and cleanup.""" + ... +``` + +## Base Stream Classes + +For advanced use cases requiring raw SSE event access. + +```python { .api } +class Stream(Generic[T]): + """ + Base synchronous stream for raw SSE events. + + Provides: + - Raw event iteration + - Response access + """ + def __iter__(self) -> Iterator[T]: + """Iterate over stream items.""" + ... + + def __enter__(self) -> Stream[T]: + """Context manager entry.""" + ... + + def __exit__(self, exc_type, exc_val, exc_tb) -> None: + """Context manager exit.""" + ... + + @property + def response(self) -> httpx.Response: + """Access raw HTTP response.""" + ... + +class AsyncStream(Generic[T]): + """ + Base asynchronous stream for raw SSE events. + + Provides: + - Async raw event iteration + - Response access + """ + def __aiter__(self) -> AsyncIterator[T]: + """Async iterate over stream items.""" + ... + + async def __aenter__(self) -> AsyncStream[T]: + """Async context manager entry.""" + ... + + async def __aexit__(self, exc_type, exc_val, exc_tb) -> None: + """Async context manager exit.""" + ... + + @property + def response(self) -> httpx.Response: + """Access raw HTTP response.""" + ... +``` + +## Stream Events + +```python { .api } +MessageStreamEvent = Union[ + MessageStartEvent, # Stream started + MessageDeltaEvent, # Usage or stop_reason updated + MessageStopEvent, # Stream completed + ContentBlockStartEvent, # New content block begins + ContentBlockDeltaEvent, # Content delta received + ContentBlockStopEvent, # Content block completed +] +``` + +### Message Lifecycle Events + +```python { .api } +class MessageStartEvent(BaseModel): + """ + Stream started with initial message metadata. + + First event in every stream. Contains message skeleton with empty content. + + Attributes: + type: Always "message_start" + message: Initial Message object with id, role, model, but empty content + """ + type: Literal["message_start"] + message: Message + +class MessageDeltaEvent(BaseModel): + """ + Message metadata updated (stop_reason or usage). + + Sent near end of stream when stop_reason is determined or final usage + statistics are available. + + Attributes: + type: Always "message_delta" + delta: Changed message fields (stop_reason, stop_sequence) + usage: Updated token usage (output_tokens) + """ + type: Literal["message_delta"] + delta: MessageDelta + usage: MessageDeltaUsage + +class MessageDelta(BaseModel): + """ + Changed message fields in MessageDeltaEvent. + + Attributes: + stop_reason: Why generation stopped - "end_turn", "max_tokens", + "stop_sequence", or "tool_use" + stop_sequence: Stop sequence that triggered completion (if applicable) + """ + stop_reason: StopReason | None + stop_sequence: str | None + +class MessageDeltaUsage(BaseModel): + """ + Token usage update in MessageDeltaEvent. + + Attributes: + output_tokens: Total output tokens generated so far + """ + output_tokens: int + +class MessageStopEvent(BaseModel): + """ + Stream completed successfully. + + Final event in every successful stream. After this event, stream is closed + and final message is available via get_final_message(). + + Attributes: + type: Always "message_stop" + """ + type: Literal["message_stop"] +``` + +### Content Block Events + +```python { .api } +class ContentBlockStartEvent(BaseModel): + """ + New content block started in response. + + Sent when Claude begins generating a new content block (text or tool_use). + Contains initial empty block structure. + + Attributes: + type: Always "content_block_start" + index: Zero-based index of this content block in message.content list + content_block: Initial ContentBlock (TextBlock or ToolUseBlock) with + empty/default values + """ + type: Literal["content_block_start"] + index: int + content_block: ContentBlock + +class ContentBlockDeltaEvent(BaseModel): + """ + Content block received incremental update. + + Most frequent event type. Contains incremental content (text deltas or + JSON deltas for tool inputs). + + Attributes: + type: Always "content_block_delta" + index: Zero-based index of content block being updated + delta: Delta content - TextDelta for text blocks, InputJSONDelta for + tool_use blocks + """ + type: Literal["content_block_delta"] + index: int + delta: ContentBlockDelta + +ContentBlockDelta = Union[TextDelta, InputJSONDelta] + +class TextDelta(BaseModel): + """ + Text content delta for TextBlock. + + Attributes: + type: Always "text_delta" + text: Incremental text to append to current text block + """ + type: Literal["text_delta"] + text: str + +class InputJSONDelta(BaseModel): + """ + Tool input JSON delta for ToolUseBlock. + + Attributes: + type: Always "input_json_delta" + partial_json: Incremental JSON string to append. May be incomplete JSON + until block completes. + """ + type: Literal["input_json_delta"] + partial_json: str + +class ContentBlockStopEvent(BaseModel): + """ + Content block completed. + + Sent when a content block finishes. After this event, the content block + at the given index is complete. + + Attributes: + type: Always "content_block_stop" + index: Zero-based index of completed content block + """ + type: Literal["content_block_stop"] + index: int +``` + +### Raw Stream Events + +For advanced use cases requiring access to raw SSE events before parsing. + +```python { .api } +RawMessageStreamEvent = Union[ + RawMessageStartEvent, + RawMessageDeltaEvent, + RawMessageStopEvent, + RawContentBlockStartEvent, + RawContentBlockDeltaEvent, + RawContentBlockStopEvent, +] + +class RawMessageStartEvent(BaseModel): + """Raw message start event from SSE.""" + type: Literal["message_start"] + message: Message + +class RawMessageDeltaEvent(BaseModel): + """Raw message delta event from SSE.""" + type: Literal["message_delta"] + delta: dict[str, Any] + usage: MessageDeltaUsage + +class RawMessageStopEvent(BaseModel): + """Raw message stop event from SSE.""" + type: Literal["message_stop"] + +class RawContentBlockStartEvent(BaseModel): + """Raw content block start from SSE.""" + type: Literal["content_block_start"] + index: int + content_block: dict[str, Any] + +class RawContentBlockDeltaEvent(BaseModel): + """Raw content block delta from SSE.""" + type: Literal["content_block_delta"] + index: int + delta: dict[str, Any] + +class RawContentBlockStopEvent(BaseModel): + """Raw content block stop from SSE.""" + type: Literal["content_block_stop"] + index: int +``` + +## Quick Examples + +### Basic Streaming + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Write a short story"}] +) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) +print() +``` + +### Stream All Events + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] +) as stream: + for event in stream: + if event.type == "message_start": + print(f"Message started: {event.message.id}") + elif event.type == "content_block_start": + print(f"Content block {event.index} started") + elif event.type == "content_block_delta": + if event.delta.type == "text_delta": + print(event.delta.text, end="", flush=True) + elif event.type == "content_block_stop": + print(f"\nContent block {event.index} stopped") + elif event.type == "message_delta": + print(f"Stop reason: {event.delta.stop_reason}") + elif event.type == "message_stop": + print("Message stopped") +``` + +### Get Final Message + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "What is 2+2?"}] +) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) + +message = stream.get_final_message() +print(f"\nTotal tokens: {message.usage.output_tokens}") +``` + +### Stream with Tool Use + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[ + { + "name": "get_weather", + "description": "Get weather for a location", + "input_schema": { + "type": "object", + "properties": { + "location": {"type": "string"} + }, + "required": ["location"], + }, + } + ], + messages=[ + {"role": "user", "content": "What's the weather in San Francisco?"} + ], +) as stream: + for event in stream: + if event.type == "content_block_start": + if event.content_block.type == "tool_use": + print(f"Tool call: {event.content_block.name}") + elif event.type == "content_block_delta": + if event.delta.type == "input_json_delta": + print(event.delta.partial_json, end="") + +message = stream.get_final_message() + +# Process tool calls +for block in message.content: + if block.type == "tool_use": + print(f"Tool: {block.name}") + print(f"Input: {block.input}") +``` + +### Async Streaming + +```python +import asyncio +from anthropic import AsyncAnthropic + +async def main(): + client = AsyncAnthropic() + async with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Write a haiku"}] + ) as stream: + async for text in stream.text_stream: + print(text, end="", flush=True) + print() + +asyncio.run(main()) +``` + +### Get Final Text Only + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] +) as stream: + # Consume stream + for _ in stream: + pass + +# Get accumulated text +text = stream.get_final_text() +print(text) +``` + +### Stream with Current Snapshot + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Count to 10"}] +) as stream: + for event in stream: + # Get current accumulated message + current = stream.current_message_snapshot + if current.content: + print(f"Current text length: {len(current.content[0].text)}") +``` + +### Async Event Processing + +```python +import asyncio +from anthropic import AsyncAnthropic + +async def stream_with_events(): + client = AsyncAnthropic() + + async with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] + ) as stream: + async for event in stream: + print(f"Event: {event.type}") + +asyncio.run(stream_with_events()) +``` + +### Error Handling in Streams + +```python +from anthropic import APIError + +try: + with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] + ) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) +except APIError as e: + print(f"Stream error: {e}") +``` + +### Manual Stream Iteration + +```python +stream_manager = client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] +) + +stream = stream_manager.__enter__() + +try: + for event in stream: + if event.type == "content_block_delta": + if event.delta.type == "text_delta": + print(event.delta.text, end="", flush=True) +finally: + stream_manager.__exit__(None, None, None) +``` + +### Streaming with Temperature + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + temperature=0.8, + messages=[{"role": "user", "content": "Write a creative story"}] +) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) +``` + +### Stream Multi-Turn Conversation + +```python +conversation = [ + {"role": "user", "content": "Hi, I'm Alice"}, +] + +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=conversation, +) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) + + message = stream.get_final_message() + +# Add to conversation +conversation.append({ + "role": "assistant", + "content": message.content, +}) +conversation.append({ + "role": "user", + "content": "What's my name?", +}) + +# Continue streaming +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=conversation, +) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) +``` + +### Access Raw HTTP Response + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] +) as stream: + # Access underlying HTTP response + print(f"Request ID: {stream.response.headers.get('request-id')}") + + for text in stream.text_stream: + print(text, end="", flush=True) +``` + +### Concurrent Async Streams + +```python +async def stream_multiple(): + client = AsyncAnthropic() + + async def stream_one(prompt: str): + async with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": prompt}], + ) as stream: + text = await stream.get_final_text() + return text + + results = await asyncio.gather( + stream_one("What is 2+2?"), + stream_one("What is the capital of France?"), + stream_one("What is Python?"), + ) + + for result in results: + print(result) + print("---") + +asyncio.run(stream_multiple()) +``` + +### Track Token Usage During Streaming + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Write a long essay"}] +) as stream: + for event in stream: + if event.type == "message_delta": + print(f"\nTokens so far: {event.usage.output_tokens}") + elif event.type == "content_block_delta": + if event.delta.type == "text_delta": + print(event.delta.text, end="", flush=True) +``` + +## See Also + +- [Messages API](./messages.md) - Core message creation +- [Streaming Guide](../guides/streaming-guide.md) - Advanced streaming patterns +- [Type System](../reference/types.md) - Complete type definitions diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/api/tools.md b/.tessl/tiles/tessl/pypi-anthropic/docs/api/tools.md new file mode 100644 index 0000000..f51e2e0 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/api/tools.md @@ -0,0 +1,423 @@ +# Tool Use API Reference + +Define and use tools (function calling) with automatic schema generation from Python functions. + +## Tool Decorators + +```python { .api } +def beta_tool(func: Callable) -> BetaFunctionTool: + """ + Decorator to create a synchronous tool from Python function. + + Automatically generates JSON schema from function signature, type hints, + and docstring. Supports standard Python types, Optional, Union, List, Dict. + + Parameters: + func: Python function to convert to tool. Must have type hints and docstring. + + Returns: + BetaFunctionTool: Wrapper with auto-generated schema and callable interface + + Example: + @beta_tool + def get_weather(location: str, unit: str = "fahrenheit") -> dict: + '''Get weather for location. + + Args: + location: City and state + unit: Temperature unit + ''' + return {"temp": 72, "condition": "sunny"} + """ + ... + +def beta_async_tool(func: Callable) -> BetaAsyncFunctionTool: + """ + Decorator to create an asynchronous tool from Python async function. + + Automatically generates JSON schema from function signature, type hints, + and docstring. Use for tools that perform async I/O operations. + + Parameters: + func: Python async function to convert to tool. Must have type hints and docstring. + + Returns: + BetaAsyncFunctionTool: Wrapper with auto-generated schema and async callable interface + + Example: + @beta_async_tool + async def fetch_url(url: str) -> str: + '''Fetch content from URL. + + Args: + url: URL to fetch + ''' + async with httpx.AsyncClient() as client: + response = await client.get(url) + return response.text + """ + ... +``` + +## Tool Runner + +```python { .api } +def tool_runner( + self, + *, + model: str, + messages: list[MessageParam], + max_tokens: int, + tools: list[ToolParam | BetaFunctionTool], + tool_choice: ToolChoice = NOT_GIVEN, + **kwargs +) -> Iterator[BetaMessage]: + """ + Run message with automatic tool execution and conversation management. + + Automatically handles the agentic loop: sends message to Claude, executes any + tool calls, sends results back, and continues until Claude responds without + tool use (stop_reason == "end_turn"). + + Parameters: + model: Model identifier (required) + messages: Initial conversation messages (required) + max_tokens: Maximum tokens per Claude response (required) + tools: List of available tools - ToolParam dicts or BetaFunctionTool/BetaAsyncFunctionTool + decorated functions + tool_choice: Control tool selection behavior - "auto" (default), "any", "none", + or specific tool name + **kwargs: Additional parameters passed to messages.create() (system, temperature, etc.) + + Yields: + BetaMessage: Message after each turn in the conversation loop. Final message + will have stop_reason "end_turn" when conversation completes. + + Raises: + Same exceptions as messages.create() + ToolExecutionError: If tool execution fails and error handling doesn't catch it + + Example: + for message in client.beta.messages.tool_runner( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[get_weather, search_db], + messages=[{"role": "user", "content": "What's the weather?"}] + ): + if message.stop_reason == "end_turn": + print(message.content[0].text) + """ + ... + +async def tool_runner( + self, + *, + model: str, + messages: list[MessageParam], + max_tokens: int, + tools: list[ToolParam | BetaAsyncFunctionTool], + tool_choice: ToolChoice = NOT_GIVEN, + **kwargs +) -> AsyncIterator[BetaMessage]: + """ + Async version of tool_runner. + + Same parameters and behavior as synchronous tool_runner(), but executes + asynchronously and supports async tools (BetaAsyncFunctionTool). + + Yields: + BetaMessage: Message after each turn in the conversation loop + """ + ... +``` + +## Tool Types + +### ToolParam + +```python { .api } +class ToolParam(TypedDict): + """ + Manual tool definition without decorator. + + Fields: + name: Tool name (alphanumeric + underscores only). Used by Claude to identify tool. + description: Clear description of what the tool does. Claude uses this to decide + when to use the tool. + input_schema: JSON Schema (draft 2020-12) defining tool parameters. Use standard + JSON Schema types: object, string, number, boolean, array, etc. + cache_control: Optional prompt caching. Use {"type": "ephemeral"} to cache tool + definition across requests. + """ + name: str + description: str + input_schema: dict[str, Any] + cache_control: NotRequired[CacheControlEphemeral] +``` + +### ToolChoice + +```python { .api } +ToolChoice = Union[ + ToolChoiceAuto, # Let Claude decide whether to use tools (default) + ToolChoiceAny, # Force Claude to use at least one tool + ToolChoiceNone, # Disable all tool use for this request + ToolChoiceTool, # Force Claude to use specific tool by name +] + +class ToolChoiceAuto(TypedDict): + """ + Let Claude decide whether to use tools based on conversation context. + + Fields: + type: Always "auto" + disable_parallel_tool_use: Set to True to force sequential tool calls. + Default is False (parallel calls allowed). + """ + type: Literal["auto"] + disable_parallel_tool_use: NotRequired[bool] + +class ToolChoiceAny(TypedDict): + """ + Force Claude to use at least one tool (any tool from available tools). + + Useful when you want to ensure Claude performs an action rather than just + responding with text. + + Fields: + type: Always "any" + disable_parallel_tool_use: Set to True to force sequential tool calls + """ + type: Literal["any"] + disable_parallel_tool_use: NotRequired[bool] + +class ToolChoiceNone(TypedDict): + """ + Disable all tool use for this request. + + Claude will respond with text only, even if tools are provided. Useful for + forcing text responses in multi-turn conversations. + + Fields: + type: Always "none" + """ + type: Literal["none"] + +class ToolChoiceTool(TypedDict): + """ + Force Claude to use a specific tool by name. + + Claude will always use this tool in its first response. Useful for + deterministic workflows. + + Fields: + type: Always "tool" + name: Exact name of tool to use (must match tool in tools list) + disable_parallel_tool_use: Set to True to force only this tool call + """ + type: Literal["tool"] + name: str + disable_parallel_tool_use: NotRequired[bool] +``` + +### Tool Function Classes + +```python { .api } +class BetaFunctionTool: + """ + Synchronous function tool wrapper created by @beta_tool decorator. + + Attributes: + name: Tool name derived from function name + description: Tool description from function docstring + input_schema: Auto-generated JSON Schema from function signature + func: Underlying Python function + """ + name: str + description: str + input_schema: dict[str, Any] + func: Callable + + def __call__(self, **kwargs) -> Any: + """Execute tool with parameters.""" + ... + + def to_param(self) -> ToolParam: + """Convert to ToolParam dict for API calls.""" + ... + +class BetaAsyncFunctionTool: + """ + Asynchronous function tool wrapper created by @beta_async_tool decorator. + + Attributes: + name: Tool name derived from function name + description: Tool description from function docstring + input_schema: Auto-generated JSON Schema from function signature + func: Underlying Python async function + """ + name: str + description: str + input_schema: dict[str, Any] + func: Callable + + async def __call__(self, **kwargs) -> Any: + """Execute async tool with parameters.""" + ... + + def to_param(self) -> ToolParam: + """Convert to ToolParam dict for API calls.""" + ... +``` + +## Quick Examples + +### Manual Tool Definition + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[{ + "name": "get_weather", + "description": "Get weather for a location", + "input_schema": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "City and state"} + }, + "required": ["location"] + } + }], + messages=[{"role": "user", "content": "What's the weather in SF?"}] +) + +for block in message.content: + if block.type == "tool_use": + print(f"Tool: {block.name}, Input: {block.input}") +``` + +### Using Tool Decorator + +```python +from anthropic import beta_tool + +@beta_tool +def get_weather(location: str, unit: str = "fahrenheit") -> dict: + """ + Get weather for a location. + + Args: + location: City and state, e.g. San Francisco, CA + unit: Temperature unit (celsius or fahrenheit) + """ + return {"location": location, "temperature": 72, "unit": unit, "condition": "sunny"} + +message = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[get_weather], + messages=[{"role": "user", "content": "What's the weather in NYC?"}] +) +``` + +### Tool Runner with Auto-Execution + +```python +@beta_tool +def get_weather(location: str) -> dict: + """Get weather for location.""" + return {"temp": 72, "condition": "sunny"} + +@beta_tool +def search_database(query: str) -> list: + """Search database.""" + return [{"id": 1, "name": "Result 1"}] + +for message in client.beta.messages.tool_runner( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[get_weather, search_database], + messages=[{"role": "user", "content": "What's the weather in SF?"}] +): + if message.stop_reason == "end_turn": + print(message.content[0].text) +``` + +### Handle Tool Call Manually + +```python +# First request - Claude requests tool use +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[...], + messages=[{"role": "user", "content": "What's the weather in Paris?"}] +) + +# Extract and execute tool call +tool_use = next(block for block in message.content if block.type == "tool_use") +weather_data = get_weather(location=tool_use.input["location"]) + +# Send result back +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[...], + messages=[ + {"role": "user", "content": "What's the weather in Paris?"}, + {"role": "assistant", "content": message.content}, + { + "role": "user", + "content": [{ + "type": "tool_result", + "tool_use_id": tool_use.id, + "content": str(weather_data) + }] + } + ] +) +``` + +### Force Tool Use + +```python +# Force any tool +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[...], + tool_choice={"type": "any"}, + messages=[{"role": "user", "content": "Hello"}] +) + +# Force specific tool +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[...], + tool_choice={"type": "tool", "name": "get_weather"}, + messages=[{"role": "user", "content": "Hello"}] +) +``` + +### Disable Parallel Tool Use + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[...], + tool_choice={ + "type": "auto", + "disable_parallel_tool_use": True + }, + messages=[{"role": "user", "content": "Get weather for NYC and LA"}] +) +``` + +## See Also + +- [Messages API](./messages.md) - Core message creation +- [Tool Usage Guide](../guides/tool-usage.md) - Advanced tool patterns +- [Type System](../reference/types.md) - Complete type definitions diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/beta/batches.md b/.tessl/tiles/tessl/pypi-anthropic/docs/beta/batches.md new file mode 100644 index 0000000..8de22d8 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/beta/batches.md @@ -0,0 +1,611 @@ +# Beta Message Batches + +Process multiple beta message requests in batch mode with support for all beta features including thinking, citations, web search, code execution, and more. + +## Overview + +Beta message batches extend standard message batches with support for beta features. They provide: +- 50% cost reduction compared to standard API +- Support for all beta features (thinking, citations, web search, etc.) +- Same interface as standard batches +- Asynchronous processing of thousands of requests + +## API Reference + +### Create Batch + +```python { .api } +def create( + self, + *, + requests: Iterable[Request], + betas: list[AnthropicBetaParam] = NOT_GIVEN, + **kwargs +) -> BetaMessageBatch: + """ + Create a batch of beta message requests. + + Parameters: + requests: List of beta message creation requests + Each request includes custom_id and params dict + betas: Additional beta features to enable beyond default batch support + + Returns: + BetaMessageBatch with batch ID and processing status + """ + ... + +async def create( + self, + **kwargs +) -> BetaMessageBatch: + """Async version of create.""" + ... +``` + +### Retrieve Batch + +```python { .api } +def retrieve( + self, + message_batch_id: str, + *, + betas: list[AnthropicBetaParam] = NOT_GIVEN, + **kwargs +) -> BetaMessageBatch: + """ + Retrieve beta message batch status and metadata. + + Parameters: + message_batch_id: ID of the batch to retrieve + betas: Optional beta features header + + Returns: + BetaMessageBatch with current processing status + """ + ... + +async def retrieve( + self, + message_batch_id: str, + **kwargs +) -> BetaMessageBatch: + """Async version of retrieve.""" + ... +``` + +### List Batches + +```python { .api } +def list( + self, + *, + after_id: str = NOT_GIVEN, + before_id: str = NOT_GIVEN, + limit: int = NOT_GIVEN, + betas: list[AnthropicBetaParam] = NOT_GIVEN, + **kwargs +) -> SyncPage[BetaMessageBatch]: + """ + List beta message batches with pagination. + + Parameters: + after_id: Return batches after this ID + before_id: Return batches before this ID + limit: Maximum number of batches to return (1-1000, default 20) + betas: Optional beta features header + + Returns: + Paginated list of BetaMessageBatch objects + """ + ... + +def list( + self, + **kwargs +) -> AsyncPaginator[BetaMessageBatch, AsyncPage[BetaMessageBatch]]: + """Async version of list.""" + ... +``` + +### Cancel Batch + +```python { .api } +def cancel( + self, + message_batch_id: str, + *, + betas: list[AnthropicBetaParam] = NOT_GIVEN, + **kwargs +) -> BetaMessageBatch: + """ + Cancel a beta message batch before processing completes. + + Parameters: + message_batch_id: ID of the batch to cancel + betas: Optional beta features header + + Returns: + BetaMessageBatch with canceling status + """ + ... + +async def cancel( + self, + message_batch_id: str, + **kwargs +) -> BetaMessageBatch: + """Async version of cancel.""" + ... +``` + +### Delete Batch + +```python { .api } +def delete( + self, + message_batch_id: str, + *, + betas: list[AnthropicBetaParam] = NOT_GIVEN, + **kwargs +) -> BetaDeletedMessageBatch: + """ + Delete a completed beta message batch. + + Batches must be finished processing before deletion. + Cancel in-progress batches first if needed. + + Parameters: + message_batch_id: ID of the batch to delete + betas: Optional beta features header + + Returns: + BetaDeletedMessageBatch confirming deletion + """ + ... + +async def delete( + self, + message_batch_id: str, + **kwargs +) -> BetaDeletedMessageBatch: + """Async version of delete.""" + ... +``` + +### Get Results + +```python { .api } +def results( + self, + message_batch_id: str, + *, + betas: list[AnthropicBetaParam] = NOT_GIVEN, + **kwargs +) -> JSONLDecoder[BetaMessageBatchIndividualResponse]: + """ + Stream beta message batch results as JSONL. + + Each line contains one request's result with custom_id for matching. + Results order is not guaranteed to match request order. + + Parameters: + message_batch_id: ID of the batch + betas: Optional beta features header + + Returns: + JSONLDecoder streaming individual responses + """ + ... + +async def results( + self, + message_batch_id: str, + **kwargs +) -> AsyncJSONLDecoder[BetaMessageBatchIndividualResponse]: + """Async version of results.""" + ... +``` + +## Examples + +### Basic Batch with Beta Features + +```python +batch = client.beta.messages.batches.create( + requests=[ + { + "custom_id": "request-1", + "params": { + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 2048, + "thinking": {"type": "enabled"}, + "messages": [ + {"role": "user", "content": "Solve this problem..."} + ], + }, + }, + { + "custom_id": "request-2", + "params": { + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 2048, + "web_search": {"type": "enabled"}, + "messages": [ + {"role": "user", "content": "What are the latest news?"} + ], + }, + } + ] +) + +print(f"Batch ID: {batch.id}") +``` + +### Wait for Completion and Get Results + +```python +import time + +# Wait for completion +while True: + batch = client.beta.messages.batches.retrieve(batch.id) + if batch.processing_status == "ended": + break + print(f"Processing: {batch.request_counts.processing} requests remaining") + time.sleep(60) + +# Process results +for response in client.beta.messages.batches.results(batch.id): + if response.result.type == "succeeded": + print(f"{response.custom_id}: Success") + # Access beta feature content + for block in response.result.message.content: + if block.type == "thinking": + print(f" Reasoning: {block.thinking[:100]}...") + elif block.type == "text": + print(f" Response: {block.text[:100]}...") + elif response.result.type == "errored": + print(f"{response.custom_id}: Error - {response.result.error.message}") +``` + +### Mixed Beta Features in Batch + +```python +# Create batch with different features per request +requests = [ + { + "custom_id": "thinking-1", + "params": { + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 4096, + "thinking": {"type": "enabled", "budget_tokens": 2000}, + "messages": [{"role": "user", "content": "Complex reasoning task"}] + } + }, + { + "custom_id": "citations-1", + "params": { + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 2048, + "citations": {"type": "enabled"}, + "messages": [{ + "role": "user", + "content": [ + {"type": "document", "source": {"type": "base64", "media_type": "application/pdf", "data": pdf_data}}, + {"type": "text", "text": "Summarize with citations"} + ] + }] + } + }, + { + "custom_id": "web-search-1", + "params": { + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 2048, + "web_search": {"type": "enabled"}, + "messages": [{"role": "user", "content": "Latest AI news"}] + } + }, + { + "custom_id": "combined-1", + "params": { + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 4096, + "thinking": {"type": "enabled"}, + "web_search": {"type": "enabled"}, + "code_execution": {"type": "enabled"}, + "messages": [{"role": "user", "content": "Research and code solution"}] + } + } +] + +batch = client.beta.messages.batches.create(requests=requests) +``` + +### Large-Scale Beta Batch + +```python +# Process thousands of requests with beta features +requests = [] +for i in range(5000): + requests.append({ + "custom_id": f"request-{i}", + "params": { + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 2048, + "thinking": {"type": "enabled"}, + "messages": [{"role": "user", "content": f"Task {i}: ..."}] + } + }) + +batch = client.beta.messages.batches.create(requests=requests) +print(f"Created batch {batch.id} with {len(requests)} requests") + +# Monitor progress +while True: + batch = client.beta.messages.batches.retrieve(batch.id) + completed = batch.request_counts.succeeded + batch.request_counts.errored + total = completed + batch.request_counts.processing + progress = (completed / total * 100) if total > 0 else 0 + + print(f"Progress: {progress:.1f}% ({completed}/{total})") + + if batch.processing_status == "ended": + break + time.sleep(60) + +# Process all results +results_by_id = {} +for response in client.beta.messages.batches.results(batch.id): + results_by_id[response.custom_id] = response.result + +print(f"Processed {len(results_by_id)} results") +``` + +### Async Batch Operations + +```python +import asyncio +from anthropic import AsyncAnthropic + +async def main(): + client = AsyncAnthropic() + + # Create batch + batch = await client.beta.messages.batches.create( + requests=[ + { + "custom_id": "async-1", + "params": { + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 2048, + "thinking": {"type": "enabled"}, + "messages": [{"role": "user", "content": "Task 1"}] + } + }, + { + "custom_id": "async-2", + "params": { + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 2048, + "web_search": {"type": "enabled"}, + "messages": [{"role": "user", "content": "Task 2"}] + } + } + ] + ) + + # Poll until complete + while True: + batch = await client.beta.messages.batches.retrieve(batch.id) + if batch.processing_status == "ended": + break + await asyncio.sleep(60) + + # Get results + results = [] + async for response in client.beta.messages.batches.results(batch.id): + results.append(response) + + return results + +results = asyncio.run(main()) +``` + +### Cancel Batch + +```python +# Cancel batch in progress +batch = client.beta.messages.batches.cancel("batch_abc123") +print(f"Status: {batch.processing_status}") # "canceling" + +# Wait for cancellation to complete +while True: + batch = client.beta.messages.batches.retrieve(batch.id) + if batch.processing_status == "ended": + break + time.sleep(30) + +print(f"Canceled: {batch.request_counts.canceled} requests") +``` + +### Delete Batch + +```python +# Delete completed batch +deleted = client.beta.messages.batches.delete("batch_abc123") +print(f"Deleted batch: {deleted.id}") +``` + +### List All Batches + +```python +# List recent batches +for batch in client.beta.messages.batches.list(limit=10): + print(f"{batch.id}: {batch.processing_status}") + print(f" Succeeded: {batch.request_counts.succeeded}") + print(f" Errored: {batch.request_counts.errored}") + print(f" Processing: {batch.request_counts.processing}") +``` + +### Error Handling + +```python +from anthropic import APIError + +try: + batch = client.beta.messages.batches.create(requests=[...]) +except APIError as e: + print(f"Failed to create batch: {e.message}") + +# Process results with error handling +for response in client.beta.messages.batches.results(batch.id): + if response.result.type == "succeeded": + try: + # Process successful result + message = response.result.message + ... + except Exception as e: + print(f"Error processing {response.custom_id}: {e}") + elif response.result.type == "errored": + print(f"Request {response.custom_id} failed: {response.result.error.message}") + elif response.result.type == "canceled": + print(f"Request {response.custom_id} was canceled") + elif response.result.type == "expired": + print(f"Request {response.custom_id} expired") +``` + +## Best Practices + +### 1. Use Meaningful Custom IDs + +```python +# Good - descriptive IDs +custom_id = f"user-{user_id}-task-{task_id}-{timestamp}" + +# Bad - generic IDs +custom_id = f"request-{i}" +``` + +### 2. Batch Similar Requests + +Group requests with similar beta features: + +```python +# Batch 1: Thinking-heavy tasks +thinking_requests = [...] + +# Batch 2: Web search tasks +search_requests = [...] + +batch1 = client.beta.messages.batches.create(requests=thinking_requests) +batch2 = client.beta.messages.batches.create(requests=search_requests) +``` + +### 3. Monitor Progress + +```python +def monitor_batch(batch_id): + """Monitor batch with progress updates.""" + last_progress = 0 + + while True: + batch = client.beta.messages.batches.retrieve(batch_id) + + total = sum([ + batch.request_counts.processing, + batch.request_counts.succeeded, + batch.request_counts.errored, + batch.request_counts.canceled + ]) + completed = batch.request_counts.succeeded + batch.request_counts.errored + progress = (completed / total * 100) if total > 0 else 0 + + if progress > last_progress + 5: # Log every 5% + print(f"Progress: {progress:.1f}%") + last_progress = progress + + if batch.processing_status == "ended": + break + + time.sleep(60) + + return batch + +batch = monitor_batch("batch_abc123") +``` + +### 4. Handle All Result Types + +```python +success_count = 0 +error_count = 0 +canceled_count = 0 +expired_count = 0 + +for response in client.beta.messages.batches.results(batch.id): + if response.result.type == "succeeded": + success_count += 1 + process_success(response) + elif response.result.type == "errored": + error_count += 1 + log_error(response) + elif response.result.type == "canceled": + canceled_count += 1 + elif response.result.type == "expired": + expired_count += 1 + +print(f"Results: {success_count} success, {error_count} errors, {canceled_count} canceled, {expired_count} expired") +``` + +### 5. Batch Size Considerations + +- Maximum 10,000 requests per batch +- Consider splitting very large workloads +- Balance batch size with monitoring needs + +```python +def create_batches_chunked(requests, chunk_size=5000): + """Split large request list into multiple batches.""" + batches = [] + + for i in range(0, len(requests), chunk_size): + chunk = requests[i:i+chunk_size] + batch = client.beta.messages.batches.create(requests=chunk) + batches.append(batch) + + return batches + +all_requests = [...] # 20,000 requests +batches = create_batches_chunked(all_requests) +``` + +## Limitations and Considerations + +### Beta Feature Support +- All beta features supported in batches +- Same limitations apply as in standard requests +- Token budgets apply per request + +### Processing Time +- Batches typically complete within 24 hours +- Large batches may take longer +- Complexity affects processing time + +### Cost Optimization +- 50% cost reduction vs standard API +- Beta features may have additional costs +- Calculate total cost including beta feature usage + +### Rate Limits +- Batch requests don't count against rate limits +- Batch creation has its own limits +- Monitor batch creation rate + +## See Also + +- [Beta Overview](./index.md) - Overview of all beta features +- [Message Features](./message-features.md) - Beta feature documentation +- [Standard Batches](../api/batches.md) - Standard batch processing +- [Messages API](../api/messages.md) - Core message creation diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/beta/files.md b/.tessl/tiles/tessl/pypi-anthropic/docs/beta/files.md new file mode 100644 index 0000000..0dc9a33 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/beta/files.md @@ -0,0 +1,669 @@ +# Beta Files API + +Upload and manage files for use in conversations and batch processing. + +## Overview + +The Files API enables you to upload, manage, and download files for use with Claude. Files can be used for document analysis, batch processing input, or general user data. + +## Key Features + +- Upload files with specified purpose (batch or user_data) +- Retrieve file metadata +- List uploaded files with pagination +- Download file content +- Delete files when no longer needed + +## Files API + +### Upload File + +```python { .api } +def upload( + self, + *, + file: FileTypes, + purpose: Literal["batch", "user_data"], + **kwargs +) -> FileMetadata: + """ + Upload file. + + Parameters: + file: File to upload (bytes, file path, or file object) + purpose: Purpose of file + "batch" - For batch processing requests/results + "user_data" - For general document analysis and user data + + Returns: + FileMetadata with file ID, size, created timestamp, and purpose + """ + ... +``` + +### Retrieve File Metadata + +```python { .api } +def retrieve( + self, + file_id: str, + **kwargs +) -> FileMetadata: + """ + Get file metadata. + + Parameters: + file_id: Unique identifier for the file + + Returns: + FileMetadata with file details (ID, filename, size, purpose, created_at) + """ + ... +``` + +### List Files + +```python { .api } +def list( + self, + *, + before_id: str = NOT_GIVEN, + after_id: str = NOT_GIVEN, + limit: int = NOT_GIVEN, + **kwargs +) -> SyncPage[FileMetadata]: + """ + List uploaded files with pagination. + + Parameters: + before_id: Return files before this ID (for reverse pagination) + after_id: Return files after this ID (for forward pagination) + limit: Maximum number of files to return (default varies by API) + + Returns: + SyncPage[FileMetadata] with paginated file list + """ + ... +``` + +### Delete File + +```python { .api } +def delete( + self, + file_id: str, + **kwargs +) -> DeletedFile: + """ + Delete a file. + + Parameters: + file_id: Unique identifier for the file to delete + + Returns: + DeletedFile confirming deletion + """ + ... +``` + +### Download File + +```python { .api } +def download( + self, + file_id: str, + **kwargs +) -> bytes: + """ + Download file content. + + Parameters: + file_id: Unique identifier for the file + + Returns: + bytes: Raw file content + """ + ... +``` + +## Examples + +### Upload File from Path + +```python +from anthropic import Anthropic +from anthropic._utils import file_from_path + +client = Anthropic() + +# Upload PDF for document analysis +file = file_from_path("document.pdf") +uploaded = client.beta.files.upload( + file=file, + purpose="user_data" +) + +print(f"Uploaded: {uploaded.id}") +print(f"Filename: {uploaded.filename}") +print(f"Size: {uploaded.size} bytes") +print(f"Created: {uploaded.created_at}") +``` + +### Upload File from Bytes + +```python +# Upload file from bytes +file_content = b"Sample document content..." +uploaded = client.beta.files.upload( + file=("document.txt", file_content), + purpose="user_data" +) + +print(f"Uploaded file ID: {uploaded.id}") +``` + +### Upload for Batch Processing + +```python +import json + +# Prepare batch requests +batch_requests = [ + { + "custom_id": "request-1", + "params": { + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 1024, + "messages": [{"role": "user", "content": "Hello"}] + } + } +] + +# Convert to JSONL format +jsonl_content = "\n".join(json.dumps(req) for req in batch_requests) + +# Upload batch file +batch_file = client.beta.files.upload( + file=("batch_requests.jsonl", jsonl_content.encode()), + purpose="batch" +) + +print(f"Batch file uploaded: {batch_file.id}") +``` + +### Retrieve File Metadata + +```python +# Get file details +file_metadata = client.beta.files.retrieve("file_abc123") + +print(f"ID: {file_metadata.id}") +print(f"Filename: {file_metadata.filename}") +print(f"Size: {file_metadata.size} bytes") +print(f"Purpose: {file_metadata.purpose}") +print(f"Created: {file_metadata.created_at}") +``` + +### List All Files + +```python +# List all uploaded files +for file in client.beta.files.list(): + print(f"{file.filename} ({file.id}) - {file.purpose}") +``` + +### List Files with Pagination + +```python +# List files with limit +page1 = client.beta.files.list(limit=10) +for file in page1: + print(f"File: {file.filename}") + +# Get next page +if page1.has_next_page(): + page2 = client.beta.files.list(limit=10, after_id=page1.data[-1].id) + for file in page2: + print(f"File: {file.filename}") + +# List files in reverse +recent_files = client.beta.files.list(limit=5) +older_files = client.beta.files.list( + limit=5, + before_id=recent_files.data[0].id +) +``` + +### Download File + +```python +# Download file content +file_id = "file_abc123" +content = client.beta.files.download(file_id) + +# Save to disk +with open("downloaded_file.pdf", "wb") as f: + f.write(content) + +print(f"Downloaded {len(content)} bytes") +``` + +### Delete File + +```python +# Delete file +deleted = client.beta.files.delete("file_abc123") +print(f"Deleted file: {deleted.id}") +print(f"Deleted: {deleted.deleted}") +``` + +### Use File in Message + +```python +# Upload document +file = file_from_path("research_paper.pdf") +uploaded = client.beta.files.upload( + file=file, + purpose="user_data" +) + +# Use file in message with citations +message = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + citations={"type": "enabled"}, + messages=[ + { + "role": "user", + "content": [ + { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": uploaded.id # Use file ID + } + }, + { + "type": "text", + "text": "Summarize this document with citations" + } + ] + } + ] +) + +# Process citations +for block in message.content: + if hasattr(block, 'citations'): + for citation in block.citations: + print(f"Citation: {citation.cited_text}") +``` + +### Manage Multiple Files + +```python +from anthropic._utils import file_from_path + +# Upload multiple files +files_to_upload = [ + "document1.pdf", + "document2.pdf", + "data.csv" +] + +uploaded_files = [] +for filepath in files_to_upload: + file = file_from_path(filepath) + uploaded = client.beta.files.upload( + file=file, + purpose="user_data" + ) + uploaded_files.append(uploaded) + print(f"Uploaded {uploaded.filename}: {uploaded.id}") + +# Track uploaded files +file_ids = [f.id for f in uploaded_files] +print(f"Uploaded {len(file_ids)} files: {file_ids}") + +# Later: clean up files +for file_id in file_ids: + client.beta.files.delete(file_id) + print(f"Deleted {file_id}") +``` + +### Async File Operations + +```python +import asyncio +from anthropic import AsyncAnthropic +from anthropic._utils import file_from_path + +async def main(): + client = AsyncAnthropic() + + # Upload file + file = file_from_path("document.pdf") + uploaded = await client.beta.files.upload( + file=file, + purpose="user_data" + ) + print(f"Uploaded: {uploaded.id}") + + # List files + files = await client.beta.files.list(limit=10) + async for file in files: + print(f"File: {file.filename}") + + # Download file + content = await client.beta.files.download(uploaded.id) + print(f"Downloaded {len(content)} bytes") + + # Delete file + deleted = await client.beta.files.delete(uploaded.id) + print(f"Deleted: {deleted.id}") + +asyncio.run(main()) +``` + +### Error Handling + +```python +from anthropic import APIError, NotFoundError, BadRequestError + +# Upload with error handling +try: + file = file_from_path("large_document.pdf") + uploaded = client.beta.files.upload( + file=file, + purpose="user_data" + ) + print(f"Uploaded: {uploaded.id}") +except BadRequestError as e: + if "size" in str(e).lower(): + print(f"File too large: {e.message}") + else: + print(f"Invalid file: {e.message}") +except APIError as e: + print(f"Upload failed: {e.message}") + +# Download with error handling +try: + content = client.beta.files.download("file_abc123") + with open("output.pdf", "wb") as f: + f.write(content) +except NotFoundError: + print("File not found") +except APIError as e: + print(f"Download failed: {e.message}") + +# Delete with validation +file_id = "file_abc123" +try: + # Check if file exists + file_metadata = client.beta.files.retrieve(file_id) + print(f"Deleting {file_metadata.filename}") + + # Delete file + deleted = client.beta.files.delete(file_id) + print(f"Deleted: {deleted.deleted}") +except NotFoundError: + print(f"File {file_id} not found") +except APIError as e: + print(f"Deletion failed: {e.message}") +``` + +### File Lifecycle Management + +```python +class FileManager: + """Manage file uploads with automatic cleanup.""" + + def __init__(self, client): + self.client = client + self.uploaded_files = [] + + def upload(self, filepath, purpose="user_data"): + """Upload file and track it.""" + file = file_from_path(filepath) + uploaded = self.client.beta.files.upload( + file=file, + purpose=purpose + ) + self.uploaded_files.append(uploaded.id) + return uploaded + + def cleanup(self): + """Delete all tracked files.""" + for file_id in self.uploaded_files: + try: + self.client.beta.files.delete(file_id) + print(f"Deleted {file_id}") + except APIError as e: + print(f"Failed to delete {file_id}: {e.message}") + self.uploaded_files.clear() + +# Usage +manager = FileManager(client) + +try: + # Upload files + file1 = manager.upload("doc1.pdf") + file2 = manager.upload("doc2.pdf") + + # Use files... + print(f"Using files: {file1.id}, {file2.id}") + +finally: + # Always cleanup + manager.cleanup() +``` + +### Check File Before Use + +```python +def safe_file_operation(file_id): + """Safely perform operations on a file.""" + try: + # Verify file exists + metadata = client.beta.files.retrieve(file_id) + + # Check file properties + if metadata.purpose != "user_data": + print(f"Warning: File has purpose '{metadata.purpose}'") + + # Download if needed + content = client.beta.files.download(file_id) + print(f"File size: {len(content)} bytes") + + return content + + except NotFoundError: + print(f"File {file_id} not found") + return None + except APIError as e: + print(f"Error: {e.message}") + return None + +# Use safely +content = safe_file_operation("file_abc123") +if content: + # Process content + pass +``` + +## File Purposes + +### batch + +Used for batch processing operations: +- Batch request files (JSONL format) +- Batch result files +- Structured data for bulk operations + +Example: +```python +batch_file = client.beta.files.upload( + file=("requests.jsonl", jsonl_data), + purpose="batch" +) +``` + +### user_data + +Used for general document analysis and conversation: +- PDF documents +- Text files +- Images +- CSV data +- Any user-provided content + +Example: +```python +doc_file = client.beta.files.upload( + file=file_from_path("document.pdf"), + purpose="user_data" +) +``` + +## Best Practices + +### 1. Use Appropriate Purpose + +Choose the right purpose for your use case: +```python +# For batch processing +client.beta.files.upload(file=batch_data, purpose="batch") + +# For document analysis +client.beta.files.upload(file=document, purpose="user_data") +``` + +### 2. Track Uploaded Files + +Keep track of file IDs for cleanup: +```python +uploaded_ids = [] +for doc in documents: + uploaded = client.beta.files.upload(file=doc, purpose="user_data") + uploaded_ids.append(uploaded.id) + +# Later cleanup +for file_id in uploaded_ids: + client.beta.files.delete(file_id) +``` + +### 3. Handle Errors Gracefully + +Always handle file operation errors: +```python +try: + uploaded = client.beta.files.upload(file=large_file, purpose="user_data") +except BadRequestError as e: + # Handle validation errors (size, format, etc.) + handle_validation_error(e) +except APIError as e: + # Handle other API errors + handle_api_error(e) +``` + +### 4. Clean Up Unused Files + +Delete files when no longer needed: +```python +# After processing +client.beta.files.delete(file_id) +``` + +### 5. Use file_from_path Helper + +Leverage the SDK helper for file uploads: +```python +from anthropic._utils import file_from_path + +# Automatically handles file reading and metadata +file = file_from_path("document.pdf") +uploaded = client.beta.files.upload(file=file, purpose="user_data") +``` + +### 6. Validate Before Upload + +Check file properties before uploading: +```python +import os + +def validate_and_upload(filepath, max_size_mb=10): + """Validate file before upload.""" + if not os.path.exists(filepath): + raise ValueError(f"File not found: {filepath}") + + size_mb = os.path.getsize(filepath) / (1024 * 1024) + if size_mb > max_size_mb: + raise ValueError(f"File too large: {size_mb:.1f}MB (max {max_size_mb}MB)") + + return client.beta.files.upload( + file=file_from_path(filepath), + purpose="user_data" + ) +``` + +### 7. Pagination for Large Lists + +Use pagination for many files: +```python +def list_all_files(client): + """List all files with pagination.""" + all_files = [] + after_id = None + + while True: + page = client.beta.files.list(limit=100, after_id=after_id) + all_files.extend(page.data) + + if not page.has_next_page(): + break + + after_id = page.data[-1].id + + return all_files +``` + +## Limitations and Considerations + +### File Size Limits + +- Check API documentation for current file size limits +- Limits may vary by purpose (batch vs user_data) +- Consider splitting large files if possible + +### Supported Formats + +- PDFs (for document analysis) +- Text files (plain text, markdown, etc.) +- Images (PNG, JPEG, etc.) +- JSONL (for batch processing) +- CSV and other structured data formats + +### File Retention + +- Files persist until explicitly deleted +- No automatic cleanup +- Monitor storage usage + +### Purpose-Specific Constraints + +- batch: Must be valid JSONL format for batch requests +- user_data: General content, validated by API + +### Rate Limits + +- File upload operations count against rate limits +- Large files may take longer to process +- Consider rate limiting for bulk uploads + +## See Also + +- [Beta Overview](./index.md) - Overview of all beta features +- [Skills API](./skills.md) - Create and manage reusable skills +- [Beta Batches](./batches.md) - Batch processing with files +- [Message Features](./message-features.md) - Using files in messages with citations diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/beta/index.md b/.tessl/tiles/tessl/pypi-anthropic/docs/beta/index.md new file mode 100644 index 0000000..907b45c --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/beta/index.md @@ -0,0 +1,358 @@ +# Beta Features Overview + +Access experimental capabilities through the `client.beta` namespace including extended thinking, citations, web search, code execution, computer use, and more. + +## What Are Beta Features? + +Beta features provide experimental functionality that may change in future releases. They enable advanced capabilities beyond standard message creation. + +## Available Beta Features + +### Message Enhancement Features + +Add advanced capabilities to message creation: + +- **[Extended Thinking](./message-features.md#extended-thinking)** - Enable Claude to show detailed reasoning with configurable token budget +- **[Citations](./message-features.md#citations)** - Source attribution for responses when working with documents +- **[Web Search](./message-features.md#web-search)** - Real-time web information retrieval +- **[Code Execution](./message-features.md#code-execution)** - Python code execution in secure sandbox +- **[Computer Use](./message-features.md#computer-use)** - GUI interaction capabilities (screenshots, mouse, keyboard) +- **[Bash Commands](./message-features.md#bash-commands)** - Shell command execution +- **[Text Editor](./message-features.md#text-editor)** - Text file editing capabilities +- **[MCP Integration](./message-features.md#mcp-integration)** - Model Context Protocol tool integration +- **[Memory Tools](./message-features.md#memory-tools)** - Persistent memory across conversations + +**[→ Message Features Documentation](./message-features.md)** + +### Resource APIs + +Manage resources and process messages at scale: + +- **[Skills API](./skills.md)** - Create and manage reusable capabilities with version control +- **[Files API](./files.md)** - Upload and manage files for use in conversations +- **[Beta Message Batches](./batches.md)** - Process multiple beta messages asynchronously with all features + +## Quick Start + +### Enable Extended Thinking + +```python +from anthropic import Anthropic + +client = Anthropic() + +message = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=4096, + thinking={"type": "enabled", "budget_tokens": 2000}, + messages=[ + {"role": "user", "content": "Solve this complex problem: ..."} + ] +) + +# Access thinking and response +for block in message.content: + if block.type == "thinking": + print(f"Reasoning: {block.thinking}") + elif block.type == "text": + print(f"Answer: {block.text}") +``` + +### Combine Multiple Features + +```python +message = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=4096, + thinking={"type": "enabled"}, + web_search={"type": "enabled"}, + citations={"type": "enabled"}, + messages=[ + { + "role": "user", + "content": [ + { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": pdf_data + } + }, + {"type": "text", "text": "Analyze this paper and find related research"} + ] + } + ] +) +``` + +## Beta Messages API + +The beta messages API extends the standard messages API with additional parameters. + +```python { .api } +def create( + self, + *, + model: str, + messages: list[BetaMessageParam], + max_tokens: int, + # Standard parameters + system: str | list[BetaTextBlockParam] = NOT_GIVEN, + metadata: MetadataParam = NOT_GIVEN, + stop_sequences: list[str] = NOT_GIVEN, + stream: bool = False, + temperature: float = NOT_GIVEN, + top_p: float = NOT_GIVEN, + top_k: int = NOT_GIVEN, + tools: list[BetaToolParam] = NOT_GIVEN, + tool_choice: BetaToolChoice = NOT_GIVEN, + # Beta feature parameters + thinking: ThinkingConfigParam = NOT_GIVEN, + citations: CitationsConfigParam = NOT_GIVEN, + web_search: WebSearchConfigParam = NOT_GIVEN, + code_execution: CodeExecutionConfigParam = NOT_GIVEN, + bash: BashConfigParam = NOT_GIVEN, + text_editor: TextEditorConfigParam = NOT_GIVEN, + computer_use: ComputerUseConfigParam = NOT_GIVEN, + mcp: MCPConfigParam = NOT_GIVEN, + memory: MemoryConfigParam = NOT_GIVEN, + context: ContextConfigParam = NOT_GIVEN, + **kwargs +) -> BetaMessage: + """ + Create message with beta features. + + All standard message parameters are supported, plus beta feature configurations. + Multiple beta features can be enabled simultaneously. + + Returns: + BetaMessage with beta content blocks + """ + ... + +async def create(self, **kwargs) -> BetaMessage: + """Async version of create.""" + ... +``` + +### Streaming + +```python { .api } +def stream( + self, + **kwargs +) -> BetaMessageStreamManager: + """Stream message with beta features.""" + ... + +def stream(self, **kwargs) -> BetaAsyncMessageStreamManager: + """Async version of stream.""" + ... +``` + +### Count Tokens + +```python { .api } +def count_tokens( + self, + *, + model: str, + messages: list[BetaMessageParam], + system: str | list[BetaTextBlockParam] = NOT_GIVEN, + tools: list[BetaToolParam] = NOT_GIVEN, + tool_choice: BetaToolChoice = NOT_GIVEN, + **kwargs +) -> BetaMessageTokensCount: + """Count tokens for beta message.""" + ... +``` + +### Tool Runner + +```python { .api } +def tool_runner( + self, + *, + model: str, + messages: list[BetaMessageParam], + max_tokens: int, + tools: list[BetaToolParam | BetaFunctionTool], + **kwargs +) -> Iterator[BetaMessage]: + """Automatically execute tools in beta messages.""" + ... +``` + +## Beta Models API + +Retrieve model information with beta feature support. + +```python { .api } +def retrieve( + self, + model_id: str, + **kwargs +) -> BetaModelInfo: + """Get information about a specific model.""" + ... + +def list( + self, + *, + before_id: str = NOT_GIVEN, + after_id: str = NOT_GIVEN, + limit: int = NOT_GIVEN, + **kwargs +) -> SyncPage[BetaModelInfo]: + """List available models with pagination.""" + ... +``` + +## Architecture + +Beta features are organized into three categories: + +### 1. Message Enhancement Features +Parameters that enhance `client.beta.messages.create()`: +- Enable advanced reasoning (thinking) +- Add real-time information (web search) +- Enable code execution +- Support document citations +- Enable computer interaction + +**[→ Full Documentation](./message-features.md)** + +### 2. Batch Processing +Process multiple beta messages with all features: +- Create batches with thinking, citations, web search, etc. +- Same interface as standard batches +- 50% cost reduction + +**[→ Batches Documentation](./batches.md)** + +### 3. Resource Management +Manage files and skills: +- Upload files for document analysis +- Create reusable skills +- Version control for skills + +**[→ Skills Documentation](./skills.md)** | **[→ Files Documentation](./files.md)** + +## Feature Comparison + +| Feature | Standard API | Beta API | +|---------|-------------|----------| +| Basic messaging | ✓ | ✓ | +| Streaming | ✓ | ✓ | +| Tool use | ✓ | ✓ | +| Extended thinking | ✗ | ✓ | +| Citations | ✗ | ✓ | +| Web search | ✗ | ✓ | +| Code execution | ✗ | ✓ | +| Computer use | ✗ | ✓ | +| Batches | ✓ | ✓ (with beta features) | +| Skills management | ✗ | ✓ | +| File management | ✗ | ✓ | + +## Important Notes + +### Stability and Changes +- **Beta features may change without notice** +- APIs may be modified or removed +- Breaking changes possible between releases +- Not recommended for production-critical features + +### Availability +- Not all features available in all regions +- Some features require specific model versions +- Check current documentation for availability + +### Usage and Costs +- Rate limits may differ for beta features +- Beta features may have additional usage costs +- Token budgets (e.g., thinking) are approximate +- Web search adds latency to requests + +### Technical Limitations +- Citations work best with document inputs (PDFs, text files) +- Code execution runs in isolated sandbox with limited packages +- Computer use requires display configuration and has security restrictions +- Web search requires internet connectivity +- Skills must include a SKILL.md file at the root +- File uploads have size limits (check API documentation) + +### Best Practices +- Test beta features in development before deploying +- Handle feature deprecation gracefully +- Monitor for API changes and updates +- Use appropriate error handling for experimental features +- Check feature availability before use + +## Migration from Standard API + +Most code using the standard API can be upgraded to beta with minimal changes: + +```python +# Standard API +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] +) + +# Beta API (backward compatible) +message = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] +) + +# Beta API (with features) +message = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + thinking={"type": "enabled"}, # Add beta features + messages=[{"role": "user", "content": "Hello"}] +) +``` + +## Documentation Structure + +### Message Enhancement Features +**[message-features.md](./message-features.md)** - Comprehensive guide to all message enhancement parameters: +- Extended thinking with budget control +- Citations configuration and usage +- Web search integration +- Code execution in sandbox +- Computer use capabilities +- Bash, text editor, MCP, memory tools +- Combined features examples +- Streaming with beta features + +### Resource APIs +**[batches.md](./batches.md)** - Beta message batches: +- Create batches with beta features +- Retrieve, list, cancel, delete operations +- Process results with JSONL decoder +- All beta features supported in batches + +**[skills.md](./skills.md)** - Skills management: +- Create and manage reusable skills +- Version control for skills +- List, retrieve, delete operations +- Skill file requirements + +**[files.md](./files.md)** - File management: +- Upload files for document analysis +- Download and list files +- Delete file operations +- File size limits and formats + +## See Also + +- [Messages API](../api/messages.md) - Core message creation +- [Tool Use API](../api/tools.md) - Tool integration +- [Streaming API](../api/streaming.md) - Streaming responses +- [Batches API](../api/batches.md) - Standard batch processing +- [Getting Started](../guides/getting-started.md) - Basic SDK usage diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/beta/message-features.md b/.tessl/tiles/tessl/pypi-anthropic/docs/beta/message-features.md new file mode 100644 index 0000000..be7157a --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/beta/message-features.md @@ -0,0 +1,1134 @@ +# Beta Message Enhancement Features + +Message enhancement features are parameters that add advanced capabilities to `client.beta.messages.create()`. Multiple features can be combined in a single request. + +## Extended Thinking + +Enable Claude to show detailed reasoning process with configurable token budget. + +### API + +```python { .api } +class ThinkingConfigParam(TypedDict): + """ + Extended thinking configuration. + + Fields: + type: "enabled" or "disabled" + budget_tokens: Maximum tokens for thinking (optional) + """ + type: Literal["enabled", "disabled"] + budget_tokens: NotRequired[int] + +class ThinkingBlock(BaseModel): + """ + Thinking content block in response. + + Attributes: + type: Always "thinking" + thinking: The reasoning content + """ + type: Literal["thinking"] + thinking: str + +class RedactedThinkingBlock(BaseModel): + """ + Redacted thinking block (when thinking disabled mid-conversation). + + Attributes: + type: Always "redacted_thinking" + """ + type: Literal["redacted_thinking"] +``` + +### Example + +```python +from anthropic import Anthropic + +client = Anthropic() + +message = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=4096, + thinking={ + "type": "enabled", + "budget_tokens": 2000, + }, + messages=[ + { + "role": "user", + "content": "Solve this complex math problem: ..." + } + ] +) + +# Check for thinking blocks +for block in message.content: + if block.type == "thinking": + print("Claude's reasoning:") + print(block.thinking) + elif block.type == "text": + print("\nFinal answer:") + print(block.text) +``` + +### Notes + +- Token budgets are approximate and may be exceeded slightly +- Thinking blocks appear before text blocks in the response +- Disabling thinking mid-conversation results in redacted blocks +- Works with all model versions that support beta features + +## Structured Outputs + +Enable type-safe, validated responses by specifying a Pydantic model schema. Claude will generate output that conforms to your schema and the SDK will automatically parse and validate it. + +### API + +```python { .api } +from typing import Generic, TypeVar, Optional +from pydantic import BaseModel + +ResponseFormatT = TypeVar('ResponseFormatT', bound=BaseModel) + +class ParsedBetaMessage(BetaMessage, Generic[ResponseFormatT]): + """ + Beta message response with parsed structured output. + + Attributes: + content: List of content blocks including ParsedBetaTextBlock + parsed_output: Convenience property to access the parsed Pydantic model + """ + content: list[ParsedBetaContentBlock[ResponseFormatT]] + + @property + def parsed_output(self) -> Optional[ResponseFormatT]: + """Extract the first parsed output from text content blocks.""" + ... + +class ParsedBetaTextBlock(BetaTextBlock, Generic[ResponseFormatT]): + """ + Text content block with parsed structured output. + + Attributes: + type: Always "text" + text: Raw JSON text + parsed_output: Validated Pydantic model instance + """ + type: Literal["text"] + text: str + parsed_output: Optional[ResponseFormatT] + +def transform_schema( + json_schema: type[BaseModel] | dict[str, Any], +) -> dict[str, Any]: + """ + Transform a Pydantic model or JSON schema for API compatibility. + + Handles: + - Format conversion for supported types + - Property transformations + - Unsupported property documentation + + Args: + json_schema: Pydantic BaseModel class or dict schema + + Returns: + Transformed schema dict compatible with API + """ + ... +``` + +### Basic Example + +```python +import pydantic +from anthropic import Anthropic + +# Define your output schema +class Order(pydantic.BaseModel): + product_name: str + price: float + quantity: int + +client = Anthropic() + +prompt = """ +Extract the product name, price, and quantity from this customer message: +"Hi, I'd like to order 2 packs of Green Tea for 5.50 dollars each." +""" + +# Use parse() method with output_format parameter +parsed_message = client.beta.messages.parse( + model="claude-sonnet-4-5-20250929", + messages=[{"role": "user", "content": prompt}], + max_tokens=1024, + output_format=Order, +) + +# Access parsed output directly +order = parsed_message.parsed_output +print(f"Product: {order.product_name}") +print(f"Price: ${order.price}") +print(f"Quantity: {order.quantity}") +# Output: +# Product: Green Tea +# Price: $5.5 +# Quantity: 2 +``` + +### Streaming Example + +```python +# Stream with structured outputs +with client.beta.messages.stream( + model="claude-sonnet-4-5-20250929", + messages=[{"role": "user", "content": prompt}], + max_tokens=1024, + output_format=Order, +) as stream: + for event in stream: + if event.type == "text": + # Get partial parsed output as it streams + partial = event.parsed_snapshot() + print(f"Partial: {partial}") + +# Get final parsed result +final_message = stream.get_final_message() +order = final_message.parsed_output +``` + +### Complex Schema Example + +```python +from typing import Literal +import pydantic + +class Address(pydantic.BaseModel): + street: str + city: str + postal_code: str + country: str + +class Customer(pydantic.BaseModel): + name: str + email: str + phone: str | None = None + address: Address + +class OrderItem(pydantic.BaseModel): + product_id: str + product_name: str + quantity: int + price: float + +class CompleteOrder(pydantic.BaseModel): + order_id: str + customer: Customer + items: list[OrderItem] + total_amount: float + status: Literal["pending", "confirmed", "shipped", "delivered"] + notes: str | None = None + +# Use with complex nested schema +parsed_message = client.beta.messages.parse( + model="claude-sonnet-4-5-20250929", + messages=[{ + "role": "user", + "content": "Extract order details from: [long email or document]" + }], + max_tokens=2048, + output_format=CompleteOrder, +) + +order = parsed_message.parsed_output +print(f"Order ID: {order.order_id}") +print(f"Customer: {order.customer.name}") +print(f"Items: {len(order.items)}") +for item in order.items: + print(f" - {item.product_name}: ${item.price} x {item.quantity}") +print(f"Total: ${order.total_amount}") +``` + +### Using transform_schema() + +```python +from anthropic import transform_schema + +# Transform a Pydantic model +class MyModel(pydantic.BaseModel): + name: str + age: int = pydantic.Field(ge=0, le=150, description="Age in years") + +# Get transformed schema +schema = transform_schema(MyModel) +print(schema) +# Transforms Pydantic schema to API-compatible format +# Unsupported constraints (ge, le) are moved to description + +# Or transform a dict schema directly +schema = transform_schema({ + "type": "integer", + "minimum": 1, + "maximum": 10, + "description": "A number" +}) +# Returns: {'type': 'integer', 'description': 'A number\n\n{minimum: 1, maximum: 10}'} +``` + +### Async Example + +```python +import asyncio +from anthropic import AsyncAnthropic + +async def main(): + client = AsyncAnthropic() + + parsed_message = await client.beta.messages.parse( + model="claude-sonnet-4-5-20250929", + messages=[{"role": "user", "content": prompt}], + max_tokens=1024, + output_format=Order, + ) + + order = parsed_message.parsed_output + print(f"Extracted: {order}") + +asyncio.run(main()) +``` + +### Async Streaming Example + +```python +async def main(): + client = AsyncAnthropic() + + async with client.beta.messages.stream( + model="claude-sonnet-4-5-20250929", + messages=[{"role": "user", "content": prompt}], + max_tokens=1024, + output_format=Order, + ) as stream: + async for event in stream: + if event.type == "text": + partial = event.parsed_snapshot() + print(f"Partial: {partial}") + + final_message = await stream.get_final_message() + order = final_message.parsed_output + +asyncio.run(main()) +``` + +### Notes + +- Requires models that support structured outputs (e.g., claude-sonnet-4-5-20250929) +- Uses Pydantic for schema definition and validation +- The `parse()` method is a convenience wrapper around `create()` that handles schema transformation +- Automatically validates output against your schema +- Raises `pydantic.ValidationError` if output doesn't match schema +- In streaming mode, `parsed_snapshot()` provides incremental parsing +- The feature automatically adds `"structured-outputs-2025-11-13"` beta header +- Supported formats: date-time, time, date, duration, email, hostname, uri, ipv4, ipv6, uuid +- Unsupported Pydantic constraints (ge, le, gt, lt, etc.) are documented in schema description + +## Context Management + +Automatically manage conversation context by clearing thinking blocks or tool uses when limits are approached. + +### API + +```python { .api } +class BetaContextManagementConfigParam(TypedDict, total=False): + edits: list[Union[BetaClearToolUses20250919EditParam, BetaClearThinking20251015EditParam]] + +class BetaClearThinking20251015EditParam(TypedDict, total=False): + type: Literal["clear_thinking_20251015"] + keep: Union[int, Literal["all"]] # Number of recent turns to keep thinking blocks + +class BetaClearToolUses20250919EditParam(TypedDict, total=False): + type: Literal["clear_tool_uses_20250919"] + trigger: Union[BetaInputTokensTriggerParam, BetaToolUsesTriggerParam] + keep: BetaToolUsesKeepParam + clear_at_least: Optional[BetaInputTokensClearAtLeastParam] + clear_tool_inputs: Union[bool, list[str], None] + exclude_tools: Optional[list[str]] +``` + +### Example + +```python +message = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + thinking={"type": "enabled"}, + context_management={ + "edits": [ + {"type": "clear_thinking_20251015", "keep": 3}, # Keep last 3 turns + { + "type": "clear_tool_uses_20250919", + "trigger": {"type": "input_tokens", "value": 100000}, + "keep": {"type": "turns", "value": 5}, + "clear_tool_inputs": True, + } + ] + }, + messages=conversation_history, +) +``` + +## Container Support + +Enable container-based file operations for beta features. + +### API + +```python { .api } +class BetaContainerParams(TypedDict, total=False): + # Container configuration parameters (check API docs for details) + pass + +class BetaContainerUploadBlock(BaseModel): + type: Literal["container_upload"] + # Upload result information +``` + +## Search Tools + +Enable document search capabilities with BM25 or regex patterns. + +### API + +```python { .api } +class BetaToolSearchToolBM2520251119Param(TypedDict, total=False): + """BM25 search tool for document retrieval.""" + type: Literal["search_tool_bm25_20251119"] + name: str + +class BetaToolSearchToolRegex20251119Param(TypedDict, total=False): + """Regex pattern search tool.""" + type: Literal["search_tool_regex_20251119"] + name: str +``` + +### Example + +```python +message = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[ + {"type": "search_tool_bm25_20251119", "name": "document_search"}, + ], + messages=[{"role": "user", "content": "Search for relevant passages about AI"}] +) +``` + +## Citations + +Enable source attribution for responses when working with documents. + +### API + +```python { .api } +class CitationsConfigParam(TypedDict): + """ + Citations configuration. + + Fields: + type: "enabled" or "disabled" + """ + type: Literal["enabled", "disabled"] + +class TextCitation(BaseModel): + """ + Citation in text content. + + Attributes: + type: Always "text_citation" + text: Cited text + cited_text: Original source text + location: Location in source + """ + type: Literal["text_citation"] + text: str + cited_text: str + location: CitationCharLocation | CitationContentBlockLocation | CitationPageLocation +``` + +### Example + +```python +message = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + citations={"type": "enabled"}, + messages=[ + { + "role": "user", + "content": [ + { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": pdf_data, + }, + }, + { + "type": "text", + "text": "Summarize this document with citations.", + }, + ], + } + ] +) + +# Extract citations +for block in message.content: + if hasattr(block, 'citations'): + for citation in block.citations: + print(f"Citation: {citation.cited_text}") + print(f"Location: {citation.location}") +``` + +### Notes + +- Works best with document inputs (PDFs, text files) +- Citations reference specific locations in source documents +- Multiple citations can appear in a single response +- Location types: character position, content block, or page number + +## Web Search + +Enable real-time web information retrieval. + +### API + +```python { .api } +class WebSearchConfigParam(TypedDict): + """ + Web search configuration. + + Fields: + type: "enabled" or "disabled" + """ + type: Literal["enabled", "disabled"] + +class WebSearchResultBlock(BaseModel): + """ + Web search result in response. + + Attributes: + type: Always "web_search_result" + url: Result URL + title: Page title + snippet: Text snippet + """ + type: Literal["web_search_result"] + url: str + title: str + snippet: str +``` + +### Example + +```python +message = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + web_search={"type": "enabled"}, + messages=[ + { + "role": "user", + "content": "What are the latest AI developments in 2025?" + } + ] +) + +# Check for web search results +for block in message.content: + if block.type == "web_search_result": + print(f"Found: {block.title}") + print(f"URL: {block.url}") + print(f"Snippet: {block.snippet}") +``` + +### Notes + +- Requires internet connectivity +- Adds latency to requests (typically 1-3 seconds) +- Results are current at time of request +- May not be available in all regions + +## Code Execution + +Enable Python code execution in secure sandbox environment. + +### API + +```python { .api } +class CodeExecutionConfigParam(TypedDict): + """ + Code execution configuration. + + Fields: + type: "enabled" or "disabled" + """ + type: Literal["enabled", "disabled"] +``` + +### Example + +```python +message = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + code_execution={"type": "enabled"}, + messages=[ + { + "role": "user", + "content": "Calculate the first 10 Fibonacci numbers." + } + ] +) +``` + +### Notes + +- Runs in isolated sandbox with limited packages +- Security restrictions prevent file system access +- Network access is disabled +- Execution timeout applies (typically 30 seconds) +- Available packages: NumPy, Pandas, Matplotlib, etc. (check docs) + +## Computer Use + +Enable computer interaction capabilities including screenshots, mouse, and keyboard control. + +### API + +```python { .api } +class ComputerUseConfigParam(TypedDict): + """ + Computer use configuration. + + Fields: + type: "enabled" or "disabled" + display_width_px: Display width in pixels + display_height_px: Display height in pixels + display_number: Display number (optional) + """ + type: Literal["enabled", "disabled"] + display_width_px: NotRequired[int] + display_height_px: NotRequired[int] + display_number: NotRequired[int] +``` + +### Example + +```python +message = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + computer_use={ + "type": "enabled", + "display_width_px": 1920, + "display_height_px": 1080, + }, + messages=[ + { + "role": "user", + "content": "Open a web browser and search for Python tutorials." + } + ] +) +``` + +### Notes + +- Requires display configuration +- Security restrictions apply +- Not available in all regions +- Use with caution - can interact with GUI +- Suitable for automation and testing scenarios + +## Bash Commands + +Enable bash command execution. + +### API + +```python { .api } +class BashConfigParam(TypedDict): + """ + Bash configuration. + + Fields: + type: "enabled" or "disabled" + """ + type: Literal["enabled", "disabled"] + +class ToolBash20250124(TypedDict): + """ + Bash tool definition. + + Fields: + type: Always "bash_20250124" + name: Tool name + """ + type: Literal["bash_20250124"] + name: str +``` + +### Notes + +- Executes shell commands in sandboxed environment +- Security restrictions apply +- Limited access to system resources +- Use with caution in production + +## Text Editor + +Enable text file editing capabilities. + +### API + +```python { .api } +class TextEditorConfigParam(TypedDict): + """ + Text editor configuration. + + Fields: + type: "enabled" or "disabled" + """ + type: Literal["enabled", "disabled"] + +class ToolTextEditor20250124(TypedDict): + """ + Text editor tool (latest version). + + Fields: + type: Always "text_editor_20250124" + name: Tool name + """ + type: Literal["text_editor_20250124"] + name: str +``` + +### Notes + +- Supports text file creation and editing +- Works in sandboxed environment +- File operations are temporary unless persisted +- Suitable for code generation and modification + +## MCP Integration + +Enable Model Context Protocol tool integration for custom external tools. + +### API + +```python { .api } +class BetaRequestMCPServerURLDefinitionParam(TypedDict, total=False): + """MCP server URL definition.""" + url: Required[str] + """URL of the MCP server""" + +class BetaMCPToolsetParam(TypedDict, total=False): + """MCP toolset configuration.""" + servers: list[BetaRequestMCPServerURLDefinitionParam] + """List of MCP server definitions""" + +class BetaMCPToolConfigParam(TypedDict, total=False): + """MCP tool configuration.""" + # Tool-specific configuration + +class BetaMCPToolUseBlock(BaseModel): + """MCP tool use in response.""" + type: Literal["mcp_tool_use"] + # MCP tool execution information + +class BetaMCPToolResultBlock(BaseModel): + """MCP tool result in response.""" + type: Literal["mcp_tool_result"] + # MCP tool result data +``` + +### Example + +```python +message = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + mcp_servers=[ + {"url": "http://localhost:8080/mcp"}, + ], + messages=[ + {"role": "user", "content": "Use the MCP tools to fetch data"} + ] +) + +# Check for MCP tool uses +for block in message.content: + if block.type == "mcp_tool_use": + print(f"MCP tool used: {block}") + elif block.type == "mcp_tool_result": + print(f"MCP result: {block}") +``` + +### Notes + +- Integrates external tools via Model Context Protocol +- Requires running MCP server(s) +- Enables custom tool integration beyond built-in tools +- Supports multiple MCP servers simultaneously +- Advanced feature for specialized use cases + +## Memory Tools + +Enable persistent memory across conversations. + +### API + +```python { .api } +class MemoryConfigParam(TypedDict): + """ + Memory configuration. + + Fields: + type: "enabled" or "disabled" + """ + type: Literal["enabled", "disabled"] + +class BetaAbstractMemoryTool: + """ + Abstract base class for memory tools. + + Subclass to implement custom memory backends. + """ + def store(self, key: str, value: Any) -> None: + """Store value in memory.""" + ... + + def retrieve(self, key: str) -> Any: + """Retrieve value from memory.""" + ... + + def delete(self, key: str) -> None: + """Delete value from memory.""" + ... +``` + +### Example - Custom Memory Implementation + +```python +# Example custom memory tool implementation +class DatabaseMemoryTool(BetaAbstractMemoryTool): + def __init__(self, db_connection): + self.db = db_connection + + def store(self, key: str, value: Any) -> None: + """Store value in database.""" + self.db.execute("INSERT INTO memory (key, value) VALUES (?, ?)", (key, value)) + + def retrieve(self, key: str) -> Any: + """Retrieve value from database.""" + result = self.db.execute("SELECT value FROM memory WHERE key = ?", (key,)).fetchone() + return result[0] if result else None + + def delete(self, key: str) -> None: + """Delete value from database.""" + self.db.execute("DELETE FROM memory WHERE key = ?", (key,)) + +# Use with beta messages +db_memory = DatabaseMemoryTool(db_conn) + +message = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + memory={"type": "enabled"}, + messages=[ + {"role": "user", "content": "Remember that my favorite color is blue."} + ] +) +``` + +### Notes + +- Enables persistent context across conversations +- Requires custom memory backend implementation +- Useful for multi-turn conversations with state +- Memory persists beyond single request + +## Combined Features Example + +Use multiple beta features together for maximum capability: + +```python +# Use multiple beta features in one request +message = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=4096, + thinking={"type": "enabled", "budget_tokens": 2000}, + web_search={"type": "enabled"}, + code_execution={"type": "enabled"}, + citations={"type": "enabled"}, + messages=[ + { + "role": "user", + "content": [ + { + "type": "document", + "source": {"type": "base64", "media_type": "application/pdf", "data": pdf_data} + }, + { + "type": "text", + "text": "Analyze this research paper, search for recent related work, and write Python code to replicate key results. Include citations." + } + ] + } + ] +) + +# Process response with all beta features +for block in message.content: + if block.type == "thinking": + print(f"Reasoning: {block.thinking}") + elif block.type == "text": + print(f"Analysis: {block.text}") + if hasattr(block, 'citations'): + print("Citations:") + for citation in block.citations: + print(f" - {citation.cited_text}") + elif block.type == "web_search_result": + print(f"Related work: {block.title} ({block.url})") +``` + +## Streaming with Beta Features + +Stream responses while using beta features: + +```python +with client.beta.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + thinking={"type": "enabled"}, + messages=[ + {"role": "user", "content": "Explain quantum computing"} + ] +) as stream: + for event in stream: + if event.type == "content_block_delta": + if event.delta.type == "thinking_delta": + print(f"[Thinking: {event.delta.thinking}]") + elif event.delta.type == "text_delta": + print(event.delta.text, end="", flush=True) + print() + +# Get final message +message = stream.get_final_message() +print(f"\nTokens used: {message.usage.output_tokens}") +``` + +## Async Beta Messages + +Use beta features with async client: + +```python +import asyncio +from anthropic import AsyncAnthropic + +async def main(): + client = AsyncAnthropic() + + message = await client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + thinking={"type": "enabled"}, + messages=[ + {"role": "user", "content": "Explain machine learning"} + ] + ) + + for block in message.content: + if block.type == "thinking": + print(f"Thinking: {block.thinking}") + elif block.type == "text": + print(f"Answer: {block.text}") + +asyncio.run(main()) +``` + +## Async Streaming with Beta Features + +Combine async streaming with beta features: + +```python +import asyncio +from anthropic import AsyncAnthropic + +async def main(): + client = AsyncAnthropic() + + async with client.beta.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + thinking={"type": "enabled"}, + web_search={"type": "enabled"}, + messages=[ + {"role": "user", "content": "What are the latest quantum computing breakthroughs?"} + ] + ) as stream: + async for event in stream: + if event.type == "content_block_delta": + if event.delta.type == "thinking_delta": + print(f"[Thinking: {event.delta.thinking}]") + elif event.delta.type == "text_delta": + print(event.delta.text, end="", flush=True) + print() + +asyncio.run(main()) +``` + +## Tool Runner with Beta Tools + +Use tool runner with beta features: + +```python +from anthropic import beta_tool + +@beta_tool +def search_database(query: str) -> list: + """Search database for query.""" + return [{"id": 1, "title": "Result"}] + +for message in client.beta.messages.tool_runner( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + tools=[search_database], + messages=[{"role": "user", "content": "Search for Python tutorials"}] +): + if message.stop_reason == "end_turn": + print(message.content[0].text) +``` + +## Best Practices + +### 1. Choose Appropriate Features + +Only enable features you need: +```python +# Don't do this (unnecessary features) +message = client.beta.messages.create( + thinking={"type": "enabled"}, + web_search={"type": "enabled"}, + code_execution={"type": "enabled"}, + messages=[{"role": "user", "content": "What is 2+2?"}] # Simple question +) + +# Do this (appropriate features) +message = client.beta.messages.create( + messages=[{"role": "user", "content": "What is 2+2?"}] # No features needed +) +``` + +### 2. Set Reasonable Token Budgets + +For thinking, set budgets appropriate to task complexity: +```python +# Simple task +thinking={"type": "enabled", "budget_tokens": 500} + +# Complex reasoning +thinking={"type": "enabled", "budget_tokens": 2000} +``` + +### 3. Handle Feature-Specific Content + +Check for feature-specific content blocks: +```python +for block in message.content: + if block.type == "thinking": + # Handle reasoning + ... + elif block.type == "web_search_result": + # Handle search result + ... + elif block.type == "text": + # Handle text + ... +``` + +### 4. Test Features Individually + +Test each feature separately before combining: +```python +# Test thinking alone +message1 = client.beta.messages.create( + thinking={"type": "enabled"}, + messages=[...] +) + +# Test web search alone +message2 = client.beta.messages.create( + web_search={"type": "enabled"}, + messages=[...] +) + +# Combine after testing +message3 = client.beta.messages.create( + thinking={"type": "enabled"}, + web_search={"type": "enabled"}, + messages=[...] +) +``` + +### 5. Monitor Costs + +Beta features may have additional costs: +```python +# Track usage +print(f"Input tokens: {message.usage.input_tokens}") +print(f"Output tokens: {message.usage.output_tokens}") +if message.usage.cache_creation_input_tokens: + print(f"Cache creation: {message.usage.cache_creation_input_tokens}") +``` + +## Limitations and Considerations + +### Token Usage +- Extended thinking increases token usage +- Web search adds overhead +- Budget accordingly for production use + +### Latency +- Web search adds 1-3 seconds +- Code execution adds execution time +- Computer use may have delays + +### Availability +- Not all features available in all regions +- Some features require specific models +- Check documentation for current availability + +### Security +- Code execution is sandboxed +- Computer use has security restrictions +- Bash commands are limited +- Use appropriate caution in production + +### Stability +- Beta features may change +- Breaking changes possible +- Monitor for deprecations +- Test after SDK updates + +## See Also + +- [Beta Overview](./index.md) - Overview of all beta features +- [Beta Batches](./batches.md) - Use beta features in batches +- [Messages API](../api/messages.md) - Core message creation +- [Streaming API](../api/streaming.md) - Streaming responses diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/beta/skills.md b/.tessl/tiles/tessl/pypi-anthropic/docs/beta/skills.md new file mode 100644 index 0000000..a2dcc2d --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/beta/skills.md @@ -0,0 +1,599 @@ +# Beta Skills API + +Create and manage reusable capabilities with version control. + +## Overview + +The Skills API allows you to create, manage, and version reusable capabilities that can be used across conversations. Skills are defined by uploading files including a SKILL.md file that describes the skill's functionality. + +## Key Features + +- Create skills with file uploads +- Version control for skill evolution +- List and filter skills by source (custom or Anthropic-provided) +- Delete skills and specific versions +- Cursor-based pagination for scalability + +## Skills API + +### Create Skill + +```python { .api } +def create( + self, + *, + display_title: str | None = NOT_GIVEN, + files: list[FileTypes] | None = NOT_GIVEN, + **kwargs +) -> SkillCreateResponse: + """ + Create a skill. + + Parameters: + display_title: Display title for the skill (human-readable label, not included in model prompt) + files: Files to upload for the skill + All files must be in the same top-level directory + Must include a SKILL.md file at the root + + Returns: + SkillCreateResponse object + """ + ... +``` + +### Retrieve Skill + +```python { .api } +def retrieve( + self, + skill_id: str, + **kwargs +) -> SkillRetrieveResponse: + """ + Retrieve skill by ID. + + Parameters: + skill_id: Unique identifier for the skill + + Returns: + SkillRetrieveResponse with skill details + """ + ... +``` + +### List Skills + +```python { .api } +def list( + self, + *, + limit: int = NOT_GIVEN, + page: str | None = NOT_GIVEN, + source: str | None = NOT_GIVEN, + **kwargs +) -> SyncPageCursor[SkillListResponse]: + """ + List skills with cursor-based pagination. + + Parameters: + limit: Number of results per page (max 100, default 20) + page: Pagination token from previous response's next_page field + source: Filter by source ("custom" for user-created, "anthropic" for Anthropic-created) + + Returns: + SyncPageCursor[SkillListResponse] with paginated results + """ + ... +``` + +### Delete Skill + +```python { .api } +def delete( + self, + skill_id: str, + **kwargs +) -> SkillDeleteResponse: + """ + Delete a skill. + + Parameters: + skill_id: Unique identifier for the skill + + Returns: + SkillDeleteResponse confirming deletion + """ + ... +``` + +## Skill Versions API + +Manage versions of existing skills for evolution and rollback capabilities. + +### Create Version + +```python { .api } +def create( + self, + skill_id: str, + *, + files: list[FileTypes] | None = NOT_GIVEN, + **kwargs +) -> VersionCreateResponse: + """ + Create a new version of a skill. + + Parameters: + skill_id: Unique identifier for the skill + files: Files to upload for the skill version + Must include a SKILL.md file at the root + + Returns: + VersionCreateResponse with new version details + """ + ... +``` + +### Retrieve Version + +```python { .api } +def retrieve( + self, + version: str, + *, + skill_id: str, + **kwargs +) -> VersionRetrieveResponse: + """ + Get details about a specific skill version. + + Parameters: + version: Version identifier + skill_id: Unique identifier for the skill + + Returns: + VersionRetrieveResponse with version details + """ + ... +``` + +### List Versions + +```python { .api } +def list( + self, + skill_id: str, + *, + limit: int | None = NOT_GIVEN, + page: str | None = NOT_GIVEN, + **kwargs +) -> SyncPageCursor[VersionListResponse]: + """ + List versions of a skill. + + Parameters: + skill_id: Unique identifier for the skill + limit: Number of items to return per page (default 20, range 1-1000) + page: Pagination token from previous response's next_page field + + Returns: + SyncPageCursor[VersionListResponse] with cursor-paginated versions + """ + ... +``` + +### Delete Version + +```python { .api } +def delete( + self, + version: str, + *, + skill_id: str, + **kwargs +) -> VersionDeleteResponse: + """ + Delete a specific skill version. + + Parameters: + version: Version identifier + skill_id: Unique identifier for the skill + + Returns: + VersionDeleteResponse confirming deletion + """ + ... +``` + +## Examples + +### Create Simple Skill + +```python +from anthropic import Anthropic +from anthropic._utils import file_from_path + +client = Anthropic() + +# Create skill with SKILL.md +skill = client.beta.skills.create( + display_title="Weather Analyzer", + files=[ + file_from_path("SKILL.md"), # Required: skill description + file_from_path("weather.py"), # Optional: implementation files + ] +) + +print(f"Created skill: {skill.id}") +print(f"Display title: {skill.display_title}") +``` + +### SKILL.md Format + +```markdown +# Weather Analyzer + +Analyze weather data and provide recommendations. + +## Capabilities + +- Temperature analysis +- Humidity assessment +- Weather recommendations + +## Usage + +Call this skill with temperature and humidity values to get weather analysis. +``` + +### List Skills + +```python +# List all custom skills +for skill in client.beta.skills.list(source="custom"): + print(f"Skill: {skill.display_title} ({skill.id})") + +# List Anthropic-provided skills +for skill in client.beta.skills.list(source="anthropic"): + print(f"Anthropic skill: {skill.display_title}") + +# Paginated listing +page_result = client.beta.skills.list(limit=10) +for skill in page_result: + print(f"Skill: {skill.display_title}") + +# Get next page +if page_result.next_page: + next_page = client.beta.skills.list(limit=10, page=page_result.next_page) +``` + +### Retrieve Skill + +```python +skill = client.beta.skills.retrieve("skill_abc123") +print(f"Name: {skill.display_title}") +print(f"Created: {skill.created_at}") +print(f"Latest version: {skill.latest_version}") +``` + +### Delete Skill + +```python +# Delete entire skill +deleted = client.beta.skills.delete("skill_abc123") +print(f"Deleted skill: {deleted.id}") +``` + +### Create New Version + +```python +# Create new version with updated files +version = client.beta.skills.versions.create( + skill_id="skill_abc123", + files=[ + file_from_path("SKILL.md"), # Updated description + file_from_path("weather_v2.py"), # New implementation + ] +) + +print(f"Created version: {version.version}") +print(f"Version ID: {version.id}") +``` + +### List Versions + +```python +# List all versions of a skill +for version in client.beta.skills.versions.list(skill_id="skill_abc123"): + print(f"Version: {version.version}") + print(f"Created: {version.created_at}") + print(f"Status: {version.status}") + print("---") + +# Paginated version listing +versions = client.beta.skills.versions.list( + skill_id="skill_abc123", + limit=5 +) + +for version in versions: + print(f"Version: {version.version}") + +# Get next page +if versions.next_page: + next_versions = client.beta.skills.versions.list( + skill_id="skill_abc123", + limit=5, + page=versions.next_page + ) +``` + +### Retrieve Specific Version + +```python +version = client.beta.skills.versions.retrieve( + skill_id="skill_abc123", + version="v1" +) + +print(f"Version: {version.version}") +print(f"Created: {version.created_at}") +``` + +### Delete Version + +```python +# Delete specific version +deleted = client.beta.skills.versions.delete( + skill_id="skill_abc123", + version="v1" +) + +print(f"Deleted version: {deleted.version}") +``` + +### Version Management Pattern + +```python +# Create skill +skill = client.beta.skills.create( + display_title="Data Processor", + files=[file_from_path("SKILL.md"), file_from_path("processor_v1.py")] +) + +print(f"Created skill {skill.id} with version {skill.latest_version}") + +# Update skill with new version +v2 = client.beta.skills.versions.create( + skill_id=skill.id, + files=[file_from_path("SKILL.md"), file_from_path("processor_v2.py")] +) + +print(f"Created version {v2.version}") + +# List all versions +versions = list(client.beta.skills.versions.list(skill_id=skill.id)) +print(f"Total versions: {len(versions)}") + +# Rollback by deleting latest version (if needed) +if len(versions) > 1: + latest = versions[0] + client.beta.skills.versions.delete( + skill_id=skill.id, + version=latest.version + ) + print(f"Rolled back to previous version") +``` + +### Multi-File Skill + +```python +# Create skill with multiple files +skill = client.beta.skills.create( + display_title="Advanced Calculator", + files=[ + file_from_path("SKILL.md"), + file_from_path("calculator.py"), + file_from_path("math_utils.py"), + file_from_path("constants.py"), + ] +) + +print(f"Created skill with {len(skill.files)} files") +``` + +### Async Skills Operations + +```python +import asyncio +from anthropic import AsyncAnthropic + +async def main(): + client = AsyncAnthropic() + + # Create skill asynchronously + skill = await client.beta.skills.create( + display_title="Async Processor", + files=[file_from_path("SKILL.md")] + ) + + # List skills + async for skill in client.beta.skills.list(): + print(f"Skill: {skill.display_title}") + + # Create version + version = await client.beta.skills.versions.create( + skill_id=skill.id, + files=[file_from_path("SKILL.md")] + ) + + print(f"Created version: {version.version}") + +asyncio.run(main()) +``` + +### Error Handling + +```python +from anthropic import APIError, BadRequestError + +try: + # Create skill + skill = client.beta.skills.create( + display_title="Test Skill", + files=[file_from_path("SKILL.md")] + ) +except BadRequestError as e: + if "SKILL.md" in str(e): + print("Error: SKILL.md file is required at root") + else: + print(f"Invalid request: {e.message}") +except APIError as e: + print(f"API error: {e.message}") + +# Validate skill exists before operations +try: + skill = client.beta.skills.retrieve("skill_abc123") + print(f"Skill exists: {skill.display_title}") +except APIError: + print("Skill not found") +``` + +## File Requirements + +### SKILL.md File + +- **Required**: Every skill must include a SKILL.md file at the root +- **Purpose**: Describes the skill's capabilities and usage +- **Format**: Markdown file with skill documentation +- **Location**: Must be in the root directory with other skill files + +### File Organization + +- All skill files must be in the same top-level directory +- No subdirectories allowed +- Include all necessary implementation files +- Keep files focused and modular + +### Example Structure + +``` +skill_files/ + ├── SKILL.md # Required: skill description + ├── main.py # Optional: implementation + ├── utils.py # Optional: utilities + └── constants.json # Optional: configuration +``` + +## Best Practices + +### 1. Clear SKILL.md Documentation + +Write comprehensive SKILL.md files: +```markdown +# Skill Name + +Clear one-line description. + +## Capabilities + +- Bullet list of what the skill does +- Specific use cases + +## Usage + +How to invoke and use the skill. + +## Requirements + +Any prerequisites or constraints. +``` + +### 2. Version Management + +- Create new versions for updates instead of deleting and recreating +- Keep version history for rollback capability +- Document changes in SKILL.md for each version + +### 3. Naming Conventions + +Use descriptive display titles: +```python +# Good +display_title="Weather Data Analyzer" + +# Bad +display_title="skill1" +``` + +### 4. File Organization + +Keep related functionality together: +```python +files=[ + file_from_path("SKILL.md"), + file_from_path("analyzer.py"), # Main logic + file_from_path("data_utils.py"), # Helper functions + file_from_path("config.json"), # Configuration +] +``` + +### 5. Error Handling + +Always validate operations: +```python +try: + skill = client.beta.skills.retrieve(skill_id) + # Use skill +except APIError: + # Handle missing skill + pass +``` + +### 6. Pagination + +Use cursor pagination for large lists: +```python +page = None +while True: + results = client.beta.skills.list(limit=100, page=page) + for skill in results: + process_skill(skill) + + if not results.next_page: + break + page = results.next_page +``` + +## Limitations and Considerations + +### File Constraints + +- All files must be in same directory (no subdirectories) +- SKILL.md required at root +- File size limits apply (check API documentation) + +### Version Limits + +- Check API for maximum versions per skill +- Old versions remain until explicitly deleted +- Deleting a skill deletes all versions + +### Source Filtering + +- "custom": User-created skills +- "anthropic": Anthropic-provided skills +- Use source filter to separate your skills from system skills + +### Pagination + +- Default page size: 20 +- Maximum page size: 100 (skills), 1000 (versions) +- Use next_page token for cursor-based pagination + +## See Also + +- [Beta Overview](./index.md) - Overview of all beta features +- [Files API](./files.md) - File upload and management +- [Beta Batches](./batches.md) - Batch processing with beta features +- [Message Features](./message-features.md) - Beta message enhancement features diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/common-tasks/basic-messaging.md b/.tessl/tiles/tessl/pypi-anthropic/docs/common-tasks/basic-messaging.md new file mode 100644 index 0000000..fc961b7 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/common-tasks/basic-messaging.md @@ -0,0 +1,336 @@ +# Basic Messaging Tasks + +Practical patterns for common messaging scenarios. For complete API reference, see **[Messages API](../api/messages.md)**. + +## Send Simple Text Message + +```python +from anthropic import Anthropic + +client = Anthropic() # Uses ANTHROPIC_API_KEY env var + +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[ + {"role": "user", "content": "Explain quantum computing in simple terms"} + ] +) + +print(message.content[0].text) +``` + +## Use System Prompt + +System prompts guide Claude's behavior and style: + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + system="You are a helpful Python programming assistant. Be concise and provide code examples.", + messages=[ + {"role": "user", "content": "How do I read a CSV file?"} + ] +) +``` + +### System Prompt with Caching + +Cache long system prompts to reduce costs: + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + system=[ + { + "type": "text", + "text": "You are an expert on Shakespeare's works. Here is context:\n\n" + long_context, + "cache_control": {"type": "ephemeral"} + } + ], + messages=[ + {"role": "user", "content": "Analyze Hamlet's soliloquy"} + ] +) + +# Check cache usage +print(f"Cache hits: {message.usage.cache_read_input_tokens}") +print(f"Cache misses: {message.usage.cache_creation_input_tokens}") +``` + +## Multi-Turn Conversation + +Maintain conversation history: + +```python +conversation = [] + +# Turn 1 +conversation.append({"role": "user", "content": "My name is Alice"}) + +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=conversation +) + +conversation.append({ + "role": "assistant", + "content": message.content +}) + +# Turn 2 +conversation.append({"role": "user", "content": "What's my name?"}) + +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=conversation +) + +print(message.content[0].text) # "Your name is Alice" +``` + +### Conversation Manager Helper + +```python +class ConversationManager: + def __init__(self, client: Anthropic, model: str, system: str = None): + self.client = client + self.model = model + self.system = system + self.history = [] + + def send(self, user_message: str) -> str: + """Send message and update history""" + self.history.append({"role": "user", "content": user_message}) + + message = self.client.messages.create( + model=self.model, + max_tokens=1024, + system=self.system, + messages=self.history + ) + + assistant_message = message.content[0].text + self.history.append({ + "role": "assistant", + "content": message.content + }) + + return assistant_message + +# Usage +conv = ConversationManager( + client, + model="claude-sonnet-4-5-20250929", + system="You are a helpful assistant" +) + +response1 = conv.send("Hello, I'm Bob") +response2 = conv.send("What's my name?") # Remembers "Bob" +``` + +## Control Response Temperature + +Temperature affects randomness (0.0 = deterministic, 1.0 = creative): + +```python +# Deterministic/factual responses +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + temperature=0.0, + messages=[{"role": "user", "content": "What is 2+2?"}] +) + +# Creative/varied responses +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + temperature=1.0, + messages=[{"role": "user", "content": "Write a creative story"}] +) +``` + +## Use Stop Sequences + +Stop generation at specific strings: + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + stop_sequences=["", "\n\n---"], + messages=[ + { + "role": "user", + "content": "List 5 colors, each on a new line. End with " + } + ] +) + +# Check if stop sequence was hit +if message.stop_reason == "stop_sequence": + print(f"Stopped at: {message.stop_sequence}") +``` + +## Track Token Usage + +Monitor input and output tokens for cost management: + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] +) + +usage = message.usage +print(f"Input tokens: {usage.input_tokens}") +print(f"Output tokens: {usage.output_tokens}") +print(f"Total: {usage.input_tokens + usage.output_tokens}") + +# With caching +if usage.cache_read_input_tokens: + print(f"Cache hits: {usage.cache_read_input_tokens} tokens") +if usage.cache_creation_input_tokens: + print(f"New cache entries: {usage.cache_creation_input_tokens} tokens") +``` + +## Count Tokens Before Sending + +Estimate costs without creating a message: + +```python +token_count = client.messages.count_tokens( + model="claude-sonnet-4-5-20250929", + messages=[ + {"role": "user", "content": "Very long message..."} + ] +) + +print(f"This request will use {token_count.input_tokens} input tokens") + +# Check if within budget +MAX_TOKENS = 100000 +if token_count.input_tokens > MAX_TOKENS: + print("Message too long, truncating...") +``` + +## Add Request Metadata + +Track users for rate limiting and abuse prevention: + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + metadata={"user_id": "user_12345"}, + messages=[{"role": "user", "content": "Hello"}] +) +``` + +## Handle Max Tokens Limit + +Deal with incomplete responses: + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=100, # Intentionally small + messages=[{"role": "user", "content": "Write a long essay"}] +) + +if message.stop_reason == "max_tokens": + print("Response was truncated. Consider increasing max_tokens.") + print(f"Partial response: {message.content[0].text}") + + # Continue generation + continued = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[ + {"role": "user", "content": "Write a long essay"}, + {"role": "assistant", "content": message.content}, + {"role": "user", "content": "Please continue"} + ] + ) +``` + +## Use Different Models + +Choose model based on requirements: + +```python +# Fast and cost-effective +message = client.messages.create( + model="claude-3-5-haiku-20241022", + max_tokens=1024, + messages=[{"role": "user", "content": "Quick question"}] +) + +# Balanced (recommended for most tasks) +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Standard query"}] +) + +# Maximum capability for complex tasks +message = client.messages.create( + model="claude-opus-4-5-20250929", + max_tokens=2048, + messages=[{"role": "user", "content": "Complex reasoning task"}] +) +``` + +## Async Messaging + +For async applications: + +```python +import asyncio +from anthropic import AsyncAnthropic + +async def send_message(content: str) -> str: + client = AsyncAnthropic() + + message = await client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": content}] + ) + + return message.content[0].text + +# Run async function +response = asyncio.run(send_message("Hello")) + +# Concurrent requests +async def send_multiple(): + client = AsyncAnthropic() + + tasks = [ + client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": f"Question {i}"}] + ) + for i in range(5) + ] + + responses = await asyncio.gather(*tasks) + return [r.content[0].text for r in responses] + +results = asyncio.run(send_multiple()) +``` + +## See Also + +- **[Messages API Reference](../api/messages.md)** - Complete API documentation +- **[Multimodal Input](./multimodal-input.md)** - Working with images and documents +- **[Streaming Responses](./streaming-responses.md)** - Real-time streaming +- **[Error Handling Guide](../guides/error-handling.md)** - Production error patterns diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/common-tasks/multimodal-input.md b/.tessl/tiles/tessl/pypi-anthropic/docs/common-tasks/multimodal-input.md new file mode 100644 index 0000000..16b426b --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/common-tasks/multimodal-input.md @@ -0,0 +1,421 @@ +# Multimodal Input Tasks + +Practical patterns for working with images, PDFs, and documents. For complete reference, see **[Multimodal Guide](../guides/multimodal.md)**. + +## Analyze Image from File + +```python +import base64 +from anthropic import Anthropic + +client = Anthropic() + +# Read and encode image +with open("image.jpg", "rb") as f: + image_data = base64.standard_b64encode(f.read()).decode() + +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{ + "role": "user", + "content": [ + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": image_data + } + }, + {"type": "text", "text": "What's in this image?"} + ] + }] +) + +print(message.content[0].text) +``` + +## Supported Image Formats + +```python +# JPEG +with open("photo.jpg", "rb") as f: + data = base64.standard_b64encode(f.read()).decode() + source = {"type": "base64", "media_type": "image/jpeg", "data": data} + +# PNG +with open("screenshot.png", "rb") as f: + data = base64.standard_b64encode(f.read()).decode() + source = {"type": "base64", "media_type": "image/png", "data": data} + +# GIF +with open("animation.gif", "rb") as f: + data = base64.standard_b64encode(f.read()).decode() + source = {"type": "base64", "media_type": "image/gif", "data": data} + +# WebP +with open("image.webp", "rb") as f: + data = base64.standard_b64encode(f.read()).decode() + source = {"type": "base64", "media_type": "image/webp", "data": data} +``` + +## Analyze Multiple Images + +Compare or analyze multiple images together: + +```python +import base64 + +def load_image(path: str) -> str: + with open(path, "rb") as f: + return base64.standard_b64encode(f.read()).decode() + +img1 = load_image("before.jpg") +img2 = load_image("after.jpg") + +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{ + "role": "user", + "content": [ + {"type": "text", "text": "Before:"}, + {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": img1}}, + {"type": "text", "text": "After:"}, + {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": img2}}, + {"type": "text", "text": "What changed between these images?"} + ] + }] +) +``` + +## Optimize Large Images + +Reduce token usage by resizing images: + +```python +from PIL import Image +import io +import base64 + +def optimize_image(image_path: str, max_size=(1024, 1024)) -> str: + """Resize and optimize image for API""" + img = Image.open(image_path) + + # Resize maintaining aspect ratio + img.thumbnail(max_size, Image.Resampling.LANCZOS) + + # Convert to JPEG with compression + buffer = io.BytesIO() + img = img.convert("RGB") # Ensure RGB mode + img.save(buffer, format="JPEG", quality=85, optimize=True) + + return base64.standard_b64encode(buffer.getvalue()).decode() + +# Use optimized image +optimized_data = optimize_image("large_photo.jpg") + +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{ + "role": "user", + "content": [ + {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": optimized_data}}, + {"type": "text", "text": "Analyze this image"} + ] + }] +) +``` + +## Process PDF Document + +```python +import base64 + +with open("document.pdf", "rb") as f: + pdf_data = base64.standard_b64encode(f.read()).decode() + +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + messages=[{ + "role": "user", + "content": [ + { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": pdf_data + } + }, + {"type": "text", "text": "Summarize the key points in this document"} + ] + }] +) +``` + +## PDF from URL + +Process publicly accessible PDFs without downloading: + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + messages=[{ + "role": "user", + "content": [ + { + "type": "document", + "source": { + "type": "url", + "media_type": "application/pdf", + "url": "https://example.com/document.pdf" + } + }, + {"type": "text", "text": "Extract the main findings"} + ] + }] +) +``` + +## Process Plain Text Document + +For large text content: + +```python +long_text = """Very long text content...""" + +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{ + "role": "user", + "content": [ + { + "type": "document", + "source": { + "type": "text", + "media_type": "text/plain", + "data": long_text + } + }, + {"type": "text", "text": "Analyze the sentiment of this text"} + ] + }] +) +``` + +## Mix Text, Images, and Documents + +Combine multiple content types: + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + messages=[{ + "role": "user", + "content": [ + {"type": "text", "text": "Here's a presentation:"}, + { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": pdf_data + } + }, + {"type": "text", "text": "And the cover image:"}, + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": image_data + } + }, + {"type": "text", "text": "Review both and provide feedback"} + ] + }] +) +``` + +## Common Image Analysis Tasks + +### Extract Text (OCR) + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{ + "role": "user", + "content": [ + {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": image_data}}, + {"type": "text", "text": "Extract all text from this image"} + ] + }] +) +``` + +### Identify Objects + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{ + "role": "user", + "content": [ + {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": image_data}}, + {"type": "text", "text": "List all objects visible in this image"} + ] + }] +) +``` + +### Answer Questions About Image + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{ + "role": "user", + "content": [ + {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": image_data}}, + {"type": "text", "text": "Is there a person in this image? If so, describe them."} + ] + }] +) +``` + +## Common Document Tasks + +### Summarize Document + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + messages=[{ + "role": "user", + "content": [ + {"type": "document", "source": {"type": "base64", "media_type": "application/pdf", "data": pdf_data}}, + {"type": "text", "text": "Provide a 3-paragraph summary"} + ] + }] +) +``` + +### Q&A on Document + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{ + "role": "user", + "content": [ + {"type": "document", "source": {"type": "base64", "media_type": "application/pdf", "data": pdf_data}}, + {"type": "text", "text": "What methodology was used in this study?"} + ] + }] +) +``` + +### Extract Specific Information + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{ + "role": "user", + "content": [ + {"type": "document", "source": {"type": "base64", "media_type": "application/pdf", "data": pdf_data}}, + {"type": "text", "text": "Extract all dates, names, and monetary amounts mentioned"} + ] + }] +) +``` + +## Use Citations (Beta) + +Get source attribution for document-based responses: + +```python +message = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + citations={"type": "enabled"}, + messages=[{ + "role": "user", + "content": [ + {"type": "document", "source": {"type": "base64", "media_type": "application/pdf", "data": pdf_data}}, + {"type": "text", "text": "Summarize with citations"} + ] + }] +) + +# Check citations +for block in message.content: + if hasattr(block, 'citations'): + for citation in block.citations: + print(f"Cited: {citation.cited_text}") +``` + +## Helper Functions + +### Load Multiple Images + +```python +import base64 +from pathlib import Path + +def load_images(directory: str) -> list[dict]: + """Load all images from directory""" + content = [] + + for path in Path(directory).glob("*.{jpg,jpeg,png}"): + with open(path, "rb") as f: + data = base64.standard_b64encode(f.read()).decode() + + # Determine media type from extension + media_type = { + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".png": "image/png" + }[path.suffix.lower()] + + content.append({ + "type": "image", + "source": { + "type": "base64", + "media_type": media_type, + "data": data + } + }) + + return content + +# Use it +images = load_images("./photos") +images.append({"type": "text", "text": "Describe all these images"}) + +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + messages=[{"role": "user", "content": images}] +) +``` + +## See Also + +- **[Multimodal Guide](../guides/multimodal.md)** - Complete multimodal documentation +- **[Messages API Reference](../api/messages.md)** - API details +- **[Beta Citations](../beta/message-features.md#citations)** - Source attribution feature diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/common-tasks/streaming-responses.md b/.tessl/tiles/tessl/pypi-anthropic/docs/common-tasks/streaming-responses.md new file mode 100644 index 0000000..f0d3c7d --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/common-tasks/streaming-responses.md @@ -0,0 +1,450 @@ +# Streaming Responses Tasks + +Practical patterns for real-time streaming. For complete reference, see **[Streaming API](../api/streaming.md)** and **[Streaming Guide](../guides/streaming-guide.md)**. + +## Basic Text Streaming + +```python +from anthropic import Anthropic + +client = Anthropic() + +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Write a short story"}] +) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) +print() # New line after stream ends +``` + +**That's it!** The `.text_stream` property automatically filters out non-text events and gives you text deltas ready to print. + +## Get Final Message After Streaming + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "What is 2+2?"}] +) as stream: + # Stream the text + for text in stream.text_stream: + print(text, end="", flush=True) + +# Get complete message with metadata +message = stream.get_final_message() +print(f"\n\nToken usage: {message.usage.output_tokens}") +print(f"Stop reason: {message.stop_reason}") +``` + +## Process All Events + +For more control, iterate over all stream events: + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] +) as stream: + for event in stream: + if event.type == "message_start": + print(f"[Stream started: {event.message.id}]") + + elif event.type == "content_block_start": + print(f"\n[Content block {event.index} started]") + + elif event.type == "content_block_delta": + if event.delta.type == "text_delta": + print(event.delta.text, end="", flush=True) + + elif event.type == "content_block_stop": + print(f"\n[Content block {event.index} stopped]") + + elif event.type == "message_delta": + print(f"\n[Stop reason: {event.delta.stop_reason}]") + print(f"[Tokens used: {event.usage.output_tokens}]") + + elif event.type == "message_stop": + print("\n[Stream completed]") +``` + +## Stream with Tool Use + +Detect when Claude wants to use tools: + +```python +tools = [{ + "name": "get_weather", + "description": "Get weather", + "input_schema": { + "type": "object", + "properties": { + "location": {"type": "string"} + }, + "required": ["location"] + } +}] + +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=tools, + messages=[{"role": "user", "content": "What's the weather in SF?"}] +) as stream: + for event in stream: + if event.type == "content_block_start": + if event.content_block.type == "tool_use": + print(f"\n[Tool call: {event.content_block.name}]") + + elif event.type == "content_block_delta": + if event.delta.type == "text_delta": + print(event.delta.text, end="", flush=True) + elif event.delta.type == "input_json_delta": + print(event.delta.partial_json, end="") + +message = stream.get_final_message() + +# Process tool calls +for block in message.content: + if block.type == "tool_use": + print(f"\nTool: {block.name}") + print(f"Input: {block.input}") +``` + +## Track Token Usage During Streaming + +Monitor token usage in real-time: + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Write a long essay"}] +) as stream: + current_tokens = 0 + + for event in stream: + if event.type == "message_delta": + current_tokens = event.usage.output_tokens + print(f"\r[Tokens: {current_tokens}]", end="") + + elif event.type == "content_block_delta": + if event.delta.type == "text_delta": + print(event.delta.text, end="", flush=True) +``` + +## Async Streaming + +For async applications: + +```python +import asyncio +from anthropic import AsyncAnthropic + +async def stream_response(): + client = AsyncAnthropic() + + async with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Write a haiku"}] + ) as stream: + async for text in stream.text_stream: + print(text, end="", flush=True) + print() + +asyncio.run(stream_response()) +``` + +## Concurrent Async Streams + +Run multiple streams in parallel: + +```python +import asyncio +from anthropic import AsyncAnthropic + +async def stream_question(client: AsyncAnthropic, question: str) -> str: + """Stream a question and return final text""" + async with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": question}] + ) as stream: + # Consume stream + async for _ in stream: + pass + return stream.get_final_text() + +async def main(): + client = AsyncAnthropic() + + questions = [ + "What is Python?", + "What is JavaScript?", + "What is Rust?" + ] + + # Run all streams concurrently + results = await asyncio.gather(*[ + stream_question(client, q) for q in questions + ]) + + for question, answer in zip(questions, results): + print(f"\nQ: {question}") + print(f"A: {answer}") + +asyncio.run(main()) +``` + +## Buffered Streaming + +Buffer output for smoother display: + +```python +import time + +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Write a story"}] +) as stream: + buffer = "" + + for text in stream.text_stream: + buffer += text + + # Flush every 10 characters or at punctuation + if len(buffer) >= 10 or text in ".!?\n": + print(buffer, end="", flush=True) + buffer = "" + time.sleep(0.02) # Smooth animation + + # Flush remaining + if buffer: + print(buffer, end="", flush=True) +``` + +## Current Message Snapshot + +Get partial message during streaming: + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Count to 10"}] +) as stream: + for event in stream: + # Access current accumulated message + snapshot = stream.current_message_snapshot + + if snapshot.content: + current_text = snapshot.content[0].text if snapshot.content[0].type == "text" else "" + print(f"\rCurrent length: {len(current_text)}", end="") +``` + +## Error Handling in Streams + +```python +from anthropic import APIError, APITimeoutError + +try: + with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] + ) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) +except APITimeoutError: + print("\n[Stream timed out]") +except APIError as e: + print(f"\n[Stream error: {e.message}]") +``` + +## Multi-Turn Conversation with Streaming + +```python +conversation = [] + +def stream_turn(user_message: str): + """Stream a conversation turn""" + conversation.append({"role": "user", "content": user_message}) + + print(f"\nUser: {user_message}") + print("Claude: ", end="") + + with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=conversation + ) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) + + message = stream.get_final_message() + conversation.append({ + "role": "assistant", + "content": message.content + }) + print() # New line + +# Conversation +stream_turn("Hi, I'm Alice") +stream_turn("What's my name?") # Claude remembers "Alice" +``` + +## Stream with System Prompt + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + system="You are a helpful Python expert. Be concise.", + messages=[{"role": "user", "content": "Explain list comprehensions"}] +) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) +``` + +## Stream with Temperature + +```python +# Creative streaming +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + temperature=0.8, + messages=[{"role": "user", "content": "Write a creative story"}] +) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) +``` + +## Stream Beta Features + +Stream with extended thinking (beta): + +```python +with client.beta.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + thinking={"type": "enabled", "budget_tokens": 1000}, + messages=[{"role": "user", "content": "Solve this complex problem: ..."}] +) as stream: + for event in stream: + if event.type == "content_block_delta": + if event.delta.type == "thinking_delta": + print(f"[Thinking: {event.delta.thinking}]", end="") + elif event.delta.type == "text_delta": + print(event.delta.text, end="", flush=True) +``` + +## Access Raw HTTP Response + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] +) as stream: + # Access underlying HTTP response + request_id = stream.response.headers.get("request-id") + print(f"[Request ID: {request_id}]") + + for text in stream.text_stream: + print(text, end="", flush=True) +``` + +## Interrupt Streaming + +Handle keyboard interrupts gracefully: + +```python +import signal +import sys + +def signal_handler(sig, frame): + print("\n[Streaming interrupted]") + sys.exit(0) + +signal.signal(signal.SIGINT, signal_handler) + +try: + with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Write a very long story"}] + ) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) +except KeyboardInterrupt: + print("\n[Streaming stopped]") +``` + +## Manual Stream Iteration + +For advanced use cases without context manager: + +```python +stream_manager = client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] +) + +stream = stream_manager.__enter__() + +try: + for event in stream: + if event.type == "content_block_delta": + if event.delta.type == "text_delta": + print(event.delta.text, end="", flush=True) + + message = stream.get_final_message() +finally: + stream_manager.__exit__(None, None, None) +``` + +## Streaming Best Practices + +### 1. Always Use Context Managers + +```python +# Good - automatic cleanup +with client.messages.stream(...) as stream: + for text in stream.text_stream: + print(text, end="") + +# Bad - manual cleanup required +stream = client.messages.stream(...) +# ... easy to forget cleanup +``` + +### 2. Set Appropriate Timeouts + +```python +import httpx + +client = Anthropic( + timeout=httpx.Timeout(120.0) # 2 minutes for long streams +) +``` + +### 3. Handle Interruptions + +Always handle potential interruptions gracefully for better UX. + +### 4. Use Async for High Concurrency + +When handling many concurrent streams, use `AsyncAnthropic` for better performance. + +## See Also + +- **[Streaming API Reference](../api/streaming.md)** - Complete event types and architecture +- **[Streaming Guide](../guides/streaming-guide.md)** - Advanced patterns +- **[Messages API](../api/messages.md)** - Core message creation diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/common-tasks/tool-integration.md b/.tessl/tiles/tessl/pypi-anthropic/docs/common-tasks/tool-integration.md new file mode 100644 index 0000000..08637d1 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/common-tasks/tool-integration.md @@ -0,0 +1,484 @@ +# Tool Integration Tasks + +Practical patterns for function calling and agentic workflows. For complete reference, see **[Tools API](../api/tools.md)** and **[Tool Usage Guide](../guides/tool-usage.md)**. + +## Quick Start with Tool Decorator + +```python +from anthropic import Anthropic, beta_tool + +client = Anthropic() + +# Define tool with decorator +@beta_tool +def get_weather(location: str, unit: str = "fahrenheit") -> dict: + """ + Get weather for a location. + + Args: + location: City and state, e.g. "San Francisco, CA" + unit: Temperature unit - "celsius" or "fahrenheit" + """ + # Your implementation here + return { + "location": location, + "temperature": 72, + "unit": unit, + "condition": "sunny" + } + +# Auto-execute tools with tool_runner +for message in client.beta.messages.tool_runner( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[get_weather], + messages=[{"role": "user", "content": "What's the weather in NYC?"}] +): + if message.stop_reason == "end_turn": + print(message.content[0].text) +``` + +**That's it!** The `tool_runner` automatically: +- Sends the request to Claude +- Detects when Claude wants to use a tool +- Executes the tool function +- Sends results back to Claude +- Continues until Claude responds with text + +## Manual Tool Handling + +For more control over the execution flow: + +```python +# 1. Define tool manually +tools = [{ + "name": "get_weather", + "description": "Get weather for a location", + "input_schema": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "City and state"} + }, + "required": ["location"] + } +}] + +# 2. Send initial request +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=tools, + messages=[{"role": "user", "content": "What's the weather in SF?"}] +) + +# 3. Check if Claude wants to use a tool +if message.stop_reason == "tool_use": + # Extract tool use + tool_use = next(block for block in message.content if block.type == "tool_use") + print(f"Claude wants to use: {tool_use.name}") + print(f"With params: {tool_use.input}") + + # 4. Execute the tool + result = get_weather(location=tool_use.input["location"]) + + # 5. Send result back to Claude + final_message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=tools, + messages=[ + {"role": "user", "content": "What's the weather in SF?"}, + {"role": "assistant", "content": message.content}, + { + "role": "user", + "content": [{ + "type": "tool_result", + "tool_use_id": tool_use.id, + "content": str(result) + }] + } + ] + ) + + print(final_message.content[0].text) +``` + +## Multiple Tools + +Claude can choose from multiple tools: + +```python +@beta_tool +def get_weather(location: str) -> dict: + """Get current weather for a location""" + return {"temp": 72, "condition": "sunny"} + +@beta_tool +def search_database(query: str, limit: int = 10) -> list: + """Search database for items matching query""" + return [{"id": 1, "name": "Item 1"}, {"id": 2, "name": "Item 2"}] + +@beta_tool +def send_email(to: str, subject: str, body: str) -> dict: + """Send an email""" + return {"status": "sent", "message_id": "abc123"} + +# Claude picks the right tool(s) +for message in client.beta.messages.tool_runner( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[get_weather, search_database, send_email], + messages=[{"role": "user", "content": "Check the weather in NYC and email it to bob@example.com"}] +): + if message.stop_reason == "end_turn": + print(message.content[0].text) +``` + +## Async Tools + +For I/O-bound operations: + +```python +import httpx +from anthropic import AsyncAnthropic, beta_async_tool + +client = AsyncAnthropic() + +@beta_async_tool +async def fetch_url(url: str) -> str: + """Fetch content from a URL""" + async with httpx.AsyncClient() as http_client: + response = await http_client.get(url) + return response.text + +# Use with async tool_runner +async for message in client.beta.messages.tool_runner( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[fetch_url], + messages=[{"role": "user", "content": "Fetch https://example.com and summarize it"}] +): + if message.stop_reason == "end_turn": + print(message.content[0].text) +``` + +## Control Tool Selection + +### Force Tool Use + +Require Claude to use at least one tool: + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[...], + tool_choice={"type": "any"}, # Force any tool + messages=[{"role": "user", "content": "Hello"}] +) +``` + +### Force Specific Tool + +Require Claude to use a specific tool: + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[...], + tool_choice={"type": "tool", "name": "get_weather"}, # Force specific tool + messages=[{"role": "user", "content": "Hello"}] +) +``` + +### Disable Tools for a Turn + +Temporarily disable tool use: + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[...], + tool_choice={"type": "none"}, # No tools this turn + messages=[...] +) +``` + +### Disable Parallel Tool Use + +Force sequential tool calls: + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[...], + tool_choice={ + "type": "auto", + "disable_parallel_tool_use": True + }, + messages=[{"role": "user", "content": "Do task A and then task B"}] +) +``` + +## Error Handling in Tools + +Report tool execution failures to Claude: + +```python +@beta_tool +def divide(a: float, b: float) -> float: + """Divide two numbers""" + if b == 0: + raise ValueError("Cannot divide by zero") + return a / b + +# Manual handling with error reporting +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[divide.to_param()], + messages=[{"role": "user", "content": "What is 10 divided by 0?"}] +) + +tool_use = next(block for block in message.content if block.type == "tool_use") + +# Execute and catch errors +try: + result = divide(**tool_use.input) + tool_result = str(result) + is_error = False +except Exception as e: + tool_result = str(e) + is_error = True + +# Send error back to Claude +final_message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[divide.to_param()], + messages=[ + {"role": "user", "content": "What is 10 divided by 0?"}, + {"role": "assistant", "content": message.content}, + { + "role": "user", + "content": [{ + "type": "tool_result", + "tool_use_id": tool_use.id, + "content": tool_result, + "is_error": is_error # Tell Claude this is an error + }] + } + ] +) + +print(final_message.content[0].text) # Claude handles the error gracefully +``` + +## Stateful Tools + +Tools that maintain state: + +```python +class Calculator: + def __init__(self): + self.memory = 0 + + @beta_tool + def calculate(self, expression: str) -> float: + """Evaluate a mathematical expression and store result""" + result = eval(expression) # Use safe evaluation in production + self.memory = result + return result + + @beta_tool + def recall(self) -> float: + """Recall the last calculation result""" + return self.memory + + @beta_tool + def clear(self) -> str: + """Clear calculator memory""" + self.memory = 0 + return "Memory cleared" + +# Use stateful tools +calc = Calculator() + +for message in client.beta.messages.tool_runner( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[calc.calculate, calc.recall, calc.clear], + messages=[{"role": "user", "content": "Calculate 5 * 8, then add 10 to that result"}] +): + if message.stop_reason == "end_turn": + print(message.content[0].text) + print(f"Final memory: {calc.memory}") +``` + +## Validate Tool Inputs + +```python +@beta_tool +def set_temperature(degrees: float, unit: str = "celsius") -> dict: + """Set thermostat temperature""" + # Validate unit + if unit not in ["celsius", "fahrenheit"]: + raise ValueError(f"Invalid unit: {unit}. Must be 'celsius' or 'fahrenheit'") + + # Validate range + if degrees < -50 or degrees > 50: + raise ValueError(f"Temperature {degrees} out of safe range (-50 to 50)") + + # Set temperature + return {"status": "success", "temperature": degrees, "unit": unit} +``` + +## Return Structured Data + +```python +from typing import TypedDict + +class UserInfo(TypedDict): + id: str + name: str + email: str + created_at: str + +@beta_tool +def get_user_info(user_id: str) -> UserInfo: + """Get user information by ID""" + # Fetch from database + return { + "id": user_id, + "name": "Alice Smith", + "email": "alice@example.com", + "created_at": "2024-01-15" + } + +# Claude can work with structured data +for message in client.beta.messages.tool_runner( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[get_user_info], + messages=[{"role": "user", "content": "Get info for user user_123 and send them a welcome email"}] +): + if message.stop_reason == "end_turn": + print(message.content[0].text) +``` + +## Complex Tool Example + +Database query tool with validation: + +```python +@beta_tool +def query_database( + table: str, + filters: dict, + limit: int = 10, + sort_by: str | None = None +) -> list[dict]: + """ + Query database with filters. + + Args: + table: Table name to query + filters: Key-value pairs for filtering (e.g., {"status": "active"}) + limit: Maximum number of results (1-100) + sort_by: Optional field to sort by + """ + # Validate inputs + valid_tables = ["users", "orders", "products"] + if table not in valid_tables: + raise ValueError(f"Invalid table. Must be one of: {valid_tables}") + + if not 1 <= limit <= 100: + raise ValueError("Limit must be between 1 and 100") + + # Execute query (pseudocode) + results = db.query(table).filter(**filters).limit(limit) + if sort_by: + results = results.sort(sort_by) + + return results.all() +``` + +## Tool Usage Best Practices + +### 1. Clear Descriptions + +Write clear docstrings - Claude uses these to decide when to call the tool: + +```python +@beta_tool +def search_products( + query: str, + category: str | None = None, + min_price: float | None = None, + max_price: float | None = None, + in_stock_only: bool = True +) -> list[dict]: + """ + Search product catalog for matching items. + + Use this tool when the user wants to find or browse products. + + Args: + query: Search keywords or product name + category: Optional category filter (e.g., "electronics", "books", "clothing") + min_price: Minimum price in USD + max_price: Maximum price in USD + in_stock_only: Only show products currently in stock + """ + ... +``` + +### 2. Use Type Hints + +Type hints improve schema generation: + +```python +from typing import Literal + +@beta_tool +def book_appointment( + date: str, # Use specific format hints in docstring + time: str, + service: Literal["haircut", "coloring", "styling"] +) -> dict: + """ + Book an appointment. + + Args: + date: Date in YYYY-MM-DD format + time: Time in HH:MM format (24-hour) + service: Type of service + """ + ... +``` + +### 3. Return Useful Data + +Return structured data that Claude can work with: + +```python +@beta_tool +def get_order_status(order_id: str) -> dict: + """Get status of an order""" + return { + "order_id": order_id, + "status": "shipped", + "tracking_number": "1Z999AA10123456784", + "estimated_delivery": "2024-01-20", + "items_count": 3 + } +``` + +## See Also + +- **[Tools API Reference](../api/tools.md)** - Complete API documentation +- **[Tool Usage Guide](../guides/tool-usage.md)** - Advanced patterns and examples +- **[Messages API](../api/messages.md)** - Core message creation diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/guides/batch-processing.md b/.tessl/tiles/tessl/pypi-anthropic/docs/guides/batch-processing.md new file mode 100644 index 0000000..32996ca --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/guides/batch-processing.md @@ -0,0 +1,288 @@ +# Batch Processing Guide + +Process thousands of messages asynchronously with 50% cost reduction using the Message Batches API. + +## Why Use Batches? + +- **Cost Reduction**: 50% lower cost compared to standard API +- **High Throughput**: Process thousands of requests asynchronously +- **No Rate Limits**: Batch requests don't count against rate limits + +## Basic Batch + +```python +batch = client.messages.batches.create( + requests=[ + { + "custom_id": "request-1", + "params": { + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 1024, + "messages": [{"role": "user", "content": "What is AI?"}] + } + }, + { + "custom_id": "request-2", + "params": { + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 1024, + "messages": [{"role": "user", "content": "What is ML?"}] + } + } + ] +) + +print(f"Batch ID: {batch.id}") +``` + +## Check Status + +```python +batch = client.messages.batches.retrieve("batch_abc123") + +print(f"Status: {batch.processing_status}") +print(f"Succeeded: {batch.request_counts.succeeded}/{batch.request_counts.processing + batch.request_counts.succeeded}") +print(f"Errored: {batch.request_counts.errored}") +``` + +## Poll Until Complete + +```python +import time + +batch_id = "batch_abc123" + +while True: + batch = client.messages.batches.retrieve(batch_id) + + if batch.processing_status == "ended": + break + + print(f"Processing: {batch.request_counts.processing} requests remaining") + time.sleep(60) # Check every minute + +print("Batch complete!") +``` + +## Get Results + +```python +results = client.messages.batches.results(batch_id) + +for response in results: + if response.result.type == "succeeded": + print(f"{response.custom_id}: {response.result.message.content[0].text}") + elif response.result.type == "errored": + print(f"{response.custom_id}: Error - {response.result.error.message}") +``` + +## Process Results + +```python +results_by_id = {} + +for response in client.messages.batches.results(batch_id): + if response.result.type == "succeeded": + results_by_id[response.custom_id] = response.result.message.content[0].text + elif response.result.type == "errored": + results_by_id[response.custom_id] = f"Error: {response.result.error.message}" + +# Access results by custom_id +print(results_by_id["request-1"]) +``` + +## Large-Scale Batch + +```python +# Generate batch requests +requests = [] +for i, question in enumerate(questions): # thousands of questions + requests.append({ + "custom_id": f"question-{i}", + "params": { + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 1024, + "messages": [{"role": "user", "content": question}] + } + }) + +# Create batch +batch = client.messages.batches.create(requests=requests) + +# Poll for completion +while True: + batch = client.messages.batches.retrieve(batch.id) + if batch.processing_status == "ended": + break + time.sleep(60) + +# Process results +for response in client.messages.batches.results(batch.id): + save_result(response.custom_id, response.result) +``` + +## Cancel Batch + +```python +batch = client.messages.batches.cancel("batch_abc123") +print(f"Status: {batch.processing_status}") # "canceling" +``` + +## Delete Batch + +```python +# Must be ended first +deleted = client.messages.batches.delete("batch_abc123") +print(f"Deleted: {deleted.id}") +``` + +## List Batches + +```python +# List all batches +for batch in client.messages.batches.list(limit=10): + print(f"{batch.id}: {batch.processing_status}") + +# Paginate manually +page = client.messages.batches.list(limit=10) +for batch in page.data: + print(batch.id) + +if page.has_next_page(): + next_page = page.get_next_page() +``` + +## Batch with Tools + +```python +batch = client.messages.batches.create( + requests=[ + { + "custom_id": "weather-sf", + "params": { + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 1024, + "tools": [{ + "name": "get_weather", + "description": "Get weather", + "input_schema": { + "type": "object", + "properties": {"location": {"type": "string"}}, + "required": ["location"] + } + }], + "messages": [{"role": "user", "content": "What's the weather in SF?"}] + } + } + ] +) +``` + +## Batch with Streaming Context + +While batches don't stream, you can prepare stream-like prompts: + +```python +requests = [] +for doc in documents: + requests.append({ + "custom_id": f"doc-{doc['id']}", + "params": { + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 2048, + "messages": [{ + "role": "user", + "content": [ + {"type": "document", "source": {"type": "base64", "media_type": "application/pdf", "data": doc['data']}}, + {"type": "text", "text": "Summarize this document"} + ] + }] + } + }) +``` + +## Async Batch Operations + +```python +import asyncio +from anthropic import AsyncAnthropic + +async def main(): + client = AsyncAnthropic() + + # Create batch + batch = await client.messages.batches.create(requests=[...]) + + # Poll until complete + while True: + batch = await client.messages.batches.retrieve(batch.id) + if batch.processing_status == "ended": + break + await asyncio.sleep(60) + + # Get results + results = [] + async for response in client.messages.batches.results(batch.id): + results.append(response) + + return results + +results = asyncio.run(main()) +``` + +## Best Practices + +### 1. Use Meaningful Custom IDs + +```python +requests = [{ + "custom_id": f"user-{user_id}-question-{question_id}", + "params": {...} +}] +``` + +### 2. Handle Errors Gracefully + +```python +for response in client.messages.batches.results(batch_id): + if response.result.type == "succeeded": + process_success(response) + elif response.result.type == "errored": + log_error(response.custom_id, response.result.error) + retry_if_needed(response.custom_id) +``` + +### 3. Batch Size Considerations + +- Batches can contain up to 10,000 requests +- Consider splitting very large workloads into multiple batches + +### 4. Monitor Progress + +```python +def monitor_batch(batch_id): + while True: + batch = client.messages.batches.retrieve(batch_id) + + total = sum([ + batch.request_counts.processing, + batch.request_counts.succeeded, + batch.request_counts.errored + ]) + + completed = batch.request_counts.succeeded + batch.request_counts.errored + progress = (completed / total * 100) if total > 0 else 0 + + print(f"Progress: {progress:.1f}% ({completed}/{total})") + + if batch.processing_status == "ended": + break + + time.sleep(60) +``` + +## See Also + +- [Batches API](../api/batches.md) - Complete API reference +- [Messages API](../api/messages.md) - Message creation +- [Beta Features](../beta/index.md) - Beta message batches diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/guides/error-handling.md b/.tessl/tiles/tessl/pypi-anthropic/docs/guides/error-handling.md new file mode 100644 index 0000000..03f65c0 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/guides/error-handling.md @@ -0,0 +1,324 @@ +# Error Handling Guide + +Robust error handling patterns for production applications using the Anthropic Python SDK. + +## Basic Error Handling + +```python +from anthropic import APIError + +try: + message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] + ) +except APIError as e: + print(f"Error: {e.message}") +``` + +## Specific Error Types + +```python +from anthropic import ( + RateLimitError, + AuthenticationError, + BadRequestError, + NotFoundError, + InternalServerError, +) + +try: + message = client.messages.create(...) +except RateLimitError as e: + retry_after = e.response.headers.get("retry-after") + print(f"Rate limited. Retry after {retry_after}s") +except AuthenticationError: + print("Invalid API key") +except BadRequestError as e: + print(f"Invalid request: {e.message}") +except NotFoundError: + print("Resource not found") +except InternalServerError: + print("Server error, please retry") +``` + +## Retry with Exponential Backoff + +```python +import time +from anthropic import RateLimitError, InternalServerError + +def create_message_with_retry(max_retries=3, base_delay=1.0): + """Create message with exponential backoff retry.""" + for attempt in range(max_retries): + try: + return client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] + ) + except (RateLimitError, InternalServerError) as e: + if attempt == max_retries - 1: + raise + + wait_time = base_delay * (2 ** attempt) + print(f"Retry {attempt + 1}/{max_retries} after {wait_time}s") + time.sleep(wait_time) + +message = create_message_with_retry() +``` + +## Advanced Retry Pattern + +```python +import random +import time +from anthropic import APIError, RateLimitError + +def exponential_backoff_retry( + func, + max_retries=5, + base_delay=1.0, + max_delay=60.0, + jitter=True +): + """ + Execute function with exponential backoff retry. + + Args: + func: Function to execute + max_retries: Maximum retry attempts + base_delay: Initial delay in seconds + max_delay: Maximum delay in seconds + jitter: Add random jitter to avoid thundering herd + """ + for attempt in range(max_retries): + try: + return func() + except RateLimitError as e: + if attempt == max_retries - 1: + raise + + # Use retry-after header if available + retry_after = e.response.headers.get("retry-after") + if retry_after: + wait_time = float(retry_after) + else: + wait_time = min(base_delay * (2 ** attempt), max_delay) + if jitter: + wait_time *= (0.5 + random.random()) + + print(f"Rate limited. Waiting {wait_time:.1f}s (attempt {attempt + 1}/{max_retries})") + time.sleep(wait_time) + except APIError as e: + if attempt == max_retries - 1: + raise + print(f"API error: {e.message}. Retrying...") + time.sleep(base_delay) + +# Usage +message = exponential_backoff_retry( + lambda: client.messages.create(...) +) +``` + +## Async Error Handling + +```python +import asyncio +from anthropic import AsyncAnthropic, APIError + +async def create_message_safe(): + client = AsyncAnthropic() + + try: + message = await client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] + ) + return message + except APIError as e: + print(f"Error: {e.message}") + return None + +result = asyncio.run(create_message_safe()) +``` + +## Circuit Breaker Pattern + +```python +import time +from anthropic import APIError + +class CircuitBreaker: + """Circuit breaker to prevent cascading failures.""" + + def __init__(self, failure_threshold=5, timeout=60): + self.failure_count = 0 + self.failure_threshold = failure_threshold + self.timeout = timeout + self.last_failure_time = None + self.is_open = False + + def call(self, func, *args, **kwargs): + """Execute function with circuit breaker protection.""" + if self.is_open: + # Check if timeout has passed + if time.time() - self.last_failure_time < self.timeout: + raise Exception("Circuit breaker is open") + else: + # Try to close circuit + self.is_open = False + self.failure_count = 0 + + try: + result = func(*args, **kwargs) + self.failure_count = 0 # Reset on success + return result + except APIError as e: + self.failure_count += 1 + self.last_failure_time = time.time() + + if self.failure_count >= self.failure_threshold: + self.is_open = True + print(f"Circuit breaker opened after {self.failure_count} failures") + + raise + +# Usage +circuit_breaker = CircuitBreaker() + +try: + message = circuit_breaker.call( + client.messages.create, + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] + ) +except Exception as e: + print(f"Circuit breaker error: {e}") +``` + +## Logging Errors + +```python +import logging +from anthropic import APIError, APIStatusError + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +try: + message = client.messages.create(...) +except APIStatusError as e: + logger.error( + "API request failed", + extra={ + "request_id": e.request_id, + "status_code": e.status_code, + "error_message": e.message, + "error_type": e.body.get("error", {}).get("type") if e.body else None, + } + ) +except APIError as e: + logger.error(f"API error: {e.message}") +``` + +## Graceful Degradation + +```python +from anthropic import APIError + +def get_response_with_fallback(user_message): + """Try primary model, fall back to simpler model on error.""" + models = [ + "claude-sonnet-4-5-20250929", + "claude-3-5-haiku-20241022", + ] + + for model in models: + try: + message = client.messages.create( + model=model, + max_tokens=1024, + messages=[{"role": "user", "content": user_message}] + ) + return message.content[0].text + except APIError as e: + print(f"Failed with {model}: {e.message}") + continue + + return "Service temporarily unavailable" + +response = get_response_with_fallback("What is AI?") +``` + +## Validate Inputs Before API Call + +```python +from anthropic import BadRequestError + +def validate_and_create_message(messages): + """Validate inputs before making API call.""" + # Validate message structure + if not messages: + raise ValueError("Messages list cannot be empty") + + for msg in messages: + if "role" not in msg or "content" not in msg: + raise ValueError("Each message must have 'role' and 'content'") + + if msg["role"] not in ["user", "assistant"]: + raise ValueError(f"Invalid role: {msg['role']}") + + # Make API call + try: + return client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=messages + ) + except BadRequestError as e: + print(f"API validation failed: {e.message}") + raise +``` + +## Best Practices + +### 1. Always Handle Exceptions + +Never let exceptions go unhandled in production. + +### 2. Use Specific Exception Types + +Catch specific exceptions for targeted handling. + +### 3. Implement Retry Logic + +Always retry transient errors (rate limits, server errors). + +### 4. Log with Context + +Include request IDs and relevant context in logs. + +### 5. Set Reasonable Timeouts + +```python +import httpx + +client = Anthropic( + timeout=httpx.Timeout(60.0), + max_retries=3 +) +``` + +### 6. Monitor Error Rates + +Track error rates to detect issues early. + +## See Also + +- [Error Reference](../reference/errors.md) - Complete exception hierarchy +- [Client Configuration](../reference/client-config.md) - Timeout and retry configuration diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/guides/getting-started.md b/.tessl/tiles/tessl/pypi-anthropic/docs/guides/getting-started.md new file mode 100644 index 0000000..76cc0e2 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/guides/getting-started.md @@ -0,0 +1,229 @@ +# Getting Started Guide + +Step-by-step guide to using the Anthropic Python SDK. + +## Installation + +```bash +pip install anthropic +``` + +Optional extras: +```bash +pip install anthropic[bedrock] # AWS Bedrock +pip install anthropic[vertex] # Google Vertex AI +pip install anthropic[aiohttp] # Alternative async HTTP +``` + +## Authentication + +Set your API key as an environment variable: + +```bash +export ANTHROPIC_API_KEY='your-api-key' +``` + +Or pass it explicitly: + +```python +from anthropic import Anthropic + +client = Anthropic(api_key="your-api-key") +``` + +## Basic Message + +```python +from anthropic import Anthropic + +client = Anthropic() + +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[ + {"role": "user", "content": "Hello, Claude!"} + ] +) + +print(message.content[0].text) +``` + +## System Prompts + +Configure Claude's behavior with system prompts: + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + system="You are a helpful AI assistant specializing in Python programming.", + messages=[ + {"role": "user", "content": "How do I read a file?"} + ] +) +``` + +## Multi-Turn Conversations + +Maintain conversation history: + +```python +messages = [ + {"role": "user", "content": "My name is Alice."}, + {"role": "assistant", "content": "Hello Alice! Nice to meet you."}, + {"role": "user", "content": "What's my name?"} +] + +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=messages +) + +print(message.content[0].text) # "Your name is Alice." +``` + +## Streaming Responses + +Stream responses for real-time feedback: + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[ + {"role": "user", "content": "Write a short story"} + ] +) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) +print() +``` + +## Working with Images + +Send images to Claude: + +```python +import base64 + +with open("image.jpg", "rb") as f: + image_data = base64.standard_b64encode(f.read()).decode() + +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{ + "role": "user", + "content": [ + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": image_data + } + }, + {"type": "text", "text": "What's in this image?"} + ] + }] +) +``` + +## Error Handling + +Always handle potential errors: + +```python +from anthropic import APIError, RateLimitError + +try: + message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] + ) +except RateLimitError as e: + print(f"Rate limited. Retry after {e.response.headers.get('retry-after')}s") +except APIError as e: + print(f"API error: {e.message}") +``` + +## Async Usage + +For async applications: + +```python +import asyncio +from anthropic import AsyncAnthropic + +async def main(): + client = AsyncAnthropic() + + message = await client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] + ) + + print(message.content[0].text) + +asyncio.run(main()) +``` + +## Best Practices + +### 1. Use Context Managers + +```python +with Anthropic() as client: + message = client.messages.create(...) +# Client automatically closed +``` + +### 2. Handle Errors Gracefully + +```python +try: + message = client.messages.create(...) +except APIError as e: + # Handle error + ... +``` + +### 3. Use Appropriate Models + +- `claude-sonnet-4-5-20250929` - Balanced intelligence and speed +- `claude-opus-4-5-20250929` - Maximum capability +- `claude-3-5-haiku-20241022` - Fast and cost-effective + +### 4. Set Reasonable Timeouts + +```python +import httpx + +client = Anthropic( + timeout=httpx.Timeout(60.0) +) +``` + +### 5. Track Token Usage + +```python +message = client.messages.create(...) +print(f"Input tokens: {message.usage.input_tokens}") +print(f"Output tokens: {message.usage.output_tokens}") +``` + +## Next Steps + +- [Multimodal Content](./multimodal.md) - Images, documents, PDFs +- [Tool Usage](./tool-usage.md) - Function calling +- [Streaming Guide](./streaming-guide.md) - Advanced streaming +- [Error Handling](./error-handling.md) - Robust error management + +## See Also + +- [Messages API](../api/messages.md) - Complete API reference +- [Client Configuration](../reference/client-config.md) - Advanced configuration diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/guides/multimodal.md b/.tessl/tiles/tessl/pypi-anthropic/docs/guides/multimodal.md new file mode 100644 index 0000000..7d6f936 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/guides/multimodal.md @@ -0,0 +1,300 @@ +# Multimodal Content Guide + +Working with images, documents, and PDFs in Claude conversations. + +## Image Input + +### Base64 Images + +```python +import base64 + +with open("image.jpg", "rb") as f: + image_data = base64.standard_b64encode(f.read()).decode() + +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{ + "role": "user", + "content": [ + { + "type": "image", + "source": { + "type": "base64", + "media_type": "image/jpeg", + "data": image_data + } + }, + {"type": "text", "text": "Describe this image"} + ] + }] +) +``` + +### Supported Image Formats + +```python { .api } +# Supported MIME types +"image/jpeg" +"image/png" +"image/gif" +"image/webp" +``` + +### Multiple Images + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{ + "role": "user", + "content": [ + {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": img1}}, + {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": img2}}, + {"type": "text", "text": "Compare these images"} + ] + }] +) +``` + +## PDF Documents + +### Base64 PDF + +```python +import base64 + +with open("document.pdf", "rb") as f: + pdf_data = base64.standard_b64encode(f.read()).decode() + +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + messages=[{ + "role": "user", + "content": [ + { + "type": "document", + "source": { + "type": "base64", + "media_type": "application/pdf", + "data": pdf_data + } + }, + {"type": "text", "text": "Summarize this document"} + ] + }] +) +``` + +### PDF URL + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + messages=[{ + "role": "user", + "content": [ + { + "type": "document", + "source": { + "type": "url", + "media_type": "application/pdf", + "url": "https://example.com/document.pdf" + } + }, + {"type": "text", "text": "Summarize this document"} + ] + }] +) +``` + +## Plain Text Documents + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{ + "role": "user", + "content": [ + { + "type": "document", + "source": { + "type": "text", + "media_type": "text/plain", + "data": "Long text content..." + } + }, + {"type": "text", "text": "Analyze this text"} + ] + }] +) +``` + +## Mixed Content + +Combine text, images, and documents: + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + messages=[{ + "role": "user", + "content": [ + {"type": "text", "text": "Review this presentation:"}, + {"type": "document", "source": {"type": "base64", "media_type": "application/pdf", "data": pdf_data}}, + {"type": "text", "text": "Here's the cover image:"}, + {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": img_data}}, + {"type": "text", "text": "What are the key points?"} + ] + }] +) +``` + +## Image Analysis Tasks + +### Object Detection + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{ + "role": "user", + "content": [ + {"type": "image", "source": {...}}, + {"type": "text", "text": "List all objects visible in this image"} + ] + }] +) +``` + +### Text Extraction (OCR) + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{ + "role": "user", + "content": [ + {"type": "image", "source": {...}}, + {"type": "text", "text": "Extract all text from this image"} + ] + }] +) +``` + +### Image Comparison + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{ + "role": "user", + "content": [ + {"type": "text", "text": "Before:"}, + {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": before_img}}, + {"type": "text", "text": "After:"}, + {"type": "image", "source": {"type": "base64", "media_type": "image/jpeg", "data": after_img}}, + {"type": "text", "text": "What changed?"} + ] + }] +) +``` + +## Document Analysis Tasks + +### Summarization + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + messages=[{ + "role": "user", + "content": [ + {"type": "document", "source": {...}}, + {"type": "text", "text": "Provide a brief summary"} + ] + }] +) +``` + +### Q&A on Documents + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{ + "role": "user", + "content": [ + {"type": "document", "source": {...}}, + {"type": "text", "text": "What is the main conclusion?"} + ] + }] +) +``` + +### Citation Support (Beta) + +```python +message = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + citations={"type": "enabled"}, + messages=[{ + "role": "user", + "content": [ + {"type": "document", "source": {...}}, + {"type": "text", "text": "Summarize with citations"} + ] + }] +) + +for block in message.content: + if hasattr(block, 'citations'): + for citation in block.citations: + print(f"Citation: {citation.cited_text}") +``` + +## Best Practices + +### 1. Optimize Image Size + +```python +from PIL import Image +import io +import base64 + +def optimize_image(image_path, max_size=(1024, 1024)): + img = Image.open(image_path) + img.thumbnail(max_size, Image.Resampling.LANCZOS) + + buffer = io.BytesIO() + img.save(buffer, format="JPEG", quality=85) + return base64.standard_b64encode(buffer.getvalue()).decode() + +optimized_data = optimize_image("large_image.jpg") +``` + +### 2. Handle Large PDFs + +For very large PDFs, consider splitting or summarizing sections. + +### 3. Specify Context + +Always provide clear instructions about what you want from the content. + +## See Also + +- [Messages API](../api/messages.md) - Content block types +- [Beta Features](../beta/index.md) - Citations feature +- [Getting Started](./getting-started.md) - Basic usage diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/guides/streaming-guide.md b/.tessl/tiles/tessl/pypi-anthropic/docs/guides/streaming-guide.md new file mode 100644 index 0000000..859fd6c --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/guides/streaming-guide.md @@ -0,0 +1,221 @@ +# Streaming Guide + +Advanced patterns for streaming responses from Claude. + +## Basic Streaming + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Write a story"}] +) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) +print() +``` + +## Event-Based Streaming + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] +) as stream: + for event in stream: + if event.type == "message_start": + print("Stream started") + elif event.type == "content_block_start": + print(f"\nContent block {event.index} started") + elif event.type == "content_block_delta": + if event.delta.type == "text_delta": + print(event.delta.text, end="", flush=True) + elif event.type == "message_stop": + print("\nStream completed") +``` + +## Get Final Message + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "What is 2+2?"}] +) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) + +# Get complete message after streaming +message = stream.get_final_message() +print(f"\nTokens used: {message.usage.output_tokens}") +``` + +## Stream with Tool Use + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[{ + "name": "get_weather", + "description": "Get weather", + "input_schema": { + "type": "object", + "properties": {"location": {"type": "string"}}, + "required": ["location"] + } + }], + messages=[{"role": "user", "content": "What's the weather in SF?"}] +) as stream: + for event in stream: + if event.type == "content_block_delta": + if event.delta.type == "text_delta": + print(event.delta.text, end="", flush=True) + elif event.delta.type == "input_json_delta": + print(event.delta.partial_json, end="") + +message = stream.get_final_message() +``` + +## Async Streaming + +```python +import asyncio +from anthropic import AsyncAnthropic + +async def main(): + client = AsyncAnthropic() + + async with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Write a haiku"}] + ) as stream: + async for text in stream.text_stream: + print(text, end="", flush=True) + print() + +asyncio.run(main()) +``` + +## Track Token Usage + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Write an essay"}] +) as stream: + tokens_used = 0 + + for event in stream: + if event.type == "message_delta": + tokens_used = event.usage.output_tokens + print(f"\nTokens: {tokens_used}", end="\r") + elif event.type == "content_block_delta": + if event.delta.type == "text_delta": + print(event.delta.text, end="", flush=True) +``` + +## Stream with Beta Features + +```python +with client.beta.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=2048, + thinking={"type": "enabled"}, + messages=[{"role": "user", "content": "Explain quantum computing"}] +) as stream: + for event in stream: + if event.type == "content_block_delta": + if event.delta.type == "thinking_delta": + print(f"[Thinking: {event.delta.thinking}]") + elif event.delta.type == "text_delta": + print(event.delta.text, end="", flush=True) +``` + +## Error Handling + +```python +from anthropic import APIError + +try: + with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] + ) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) +except APIError as e: + print(f"\nStream error: {e.message}") +``` + +## Buffered Streaming + +Buffer output for smoother display: + +```python +import time + +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Write a story"}] +) as stream: + buffer = "" + + for text in stream.text_stream: + buffer += text + + # Flush buffer every 10 characters or at punctuation + if len(buffer) >= 10 or text in ".!?\n": + print(buffer, end="", flush=True) + buffer = "" + time.sleep(0.01) # Smooth animation + + if buffer: # Flush remaining + print(buffer, end="", flush=True) +``` + +## Best Practices + +### 1. Use Context Managers + +Always use `with` statement to ensure proper cleanup: + +```python +with client.messages.stream(...) as stream: + ... +# Stream automatically closed +``` + +### 2. Handle Interruptions + +```python +import signal + +def signal_handler(sig, frame): + print("\nStream interrupted") + stream.close() + sys.exit(0) + +signal.signal(signal.SIGINT, signal_handler) +``` + +### 3. Set Appropriate Timeouts + +```python +import httpx + +client = Anthropic( + timeout=httpx.Timeout(120.0) # 2 minutes for streaming +) +``` + +## See Also + +- [Streaming API](../api/streaming.md) - Complete API reference +- [Messages API](../api/messages.md) - Message creation +- [Beta Features](../beta/index.md) - Extended thinking streaming diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/guides/tool-usage.md b/.tessl/tiles/tessl/pypi-anthropic/docs/guides/tool-usage.md new file mode 100644 index 0000000..a1a55a3 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/guides/tool-usage.md @@ -0,0 +1,342 @@ +# Tool Usage Guide + +Comprehensive guide to using tools (function calling) with Claude for building agentic workflows. + +## Tool Basics + +Tools let Claude call functions you define, enabling it to take actions and retrieve information. + +### Manual Tool Definition + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[{ + "name": "get_weather", + "description": "Get weather for a location", + "input_schema": { + "type": "object", + "properties": { + "location": {"type": "string", "description": "City and state"} + }, + "required": ["location"] + } + }], + messages=[{"role": "user", "content": "What's the weather in SF?"}] +) +``` + +### Using Tool Decorators + +```python +from anthropic import beta_tool + +@beta_tool +def get_weather(location: str, unit: str = "fahrenheit") -> dict: + """ + Get weather for a location. + + Args: + location: City and state, e.g. San Francisco, CA + unit: Temperature unit (celsius or fahrenheit) + """ + # Implementation + return {"temperature": 72, "condition": "sunny", "unit": unit} + +message = client.beta.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[get_weather], + messages=[{"role": "user", "content": "What's the weather in NYC?"}] +) +``` + +## Tool Execution Flow + +### Manual Execution + +```python +# 1. Send initial request +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[...], + messages=[{"role": "user", "content": "What's the weather in Paris?"}] +) + +# 2. Extract tool use +tool_use = next(block for block in message.content if block.type == "tool_use") + +# 3. Execute function +result = get_weather(location=tool_use.input["location"]) + +# 4. Send result back +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[...], + messages=[ + {"role": "user", "content": "What's the weather in Paris?"}, + {"role": "assistant", "content": message.content}, + { + "role": "user", + "content": [{ + "type": "tool_result", + "tool_use_id": tool_use.id, + "content": str(result) + }] + } + ] +) +``` + +### Automatic Execution with Tool Runner + +```python +@beta_tool +def get_weather(location: str) -> dict: + """Get weather for location.""" + return {"temp": 72, "condition": "sunny"} + +@beta_tool +def get_time(timezone: str = "UTC") -> str: + """Get current time in timezone.""" + from datetime import datetime + return datetime.now().strftime("%H:%M") + +for message in client.beta.messages.tool_runner( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[get_weather, get_time], + messages=[{"role": "user", "content": "What's the weather and time in SF?"}] +): + if message.stop_reason == "end_turn": + print(message.content[0].text) +``` + +## Tool Choice Control + +### Auto (Default) + +Let Claude decide whether to use tools: + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[...], + tool_choice={"type": "auto"}, + messages=[...] +) +``` + +### Force Any Tool + +Require Claude to use at least one tool: + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[...], + tool_choice={"type": "any"}, + messages=[...] +) +``` + +### Force Specific Tool + +Require Claude to use a specific tool: + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[...], + tool_choice={"type": "tool", "name": "get_weather"}, + messages=[...] +) +``` + +### Disable All Tools + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[...], + tool_choice={"type": "none"}, + messages=[...] +) +``` + +### Disable Parallel Tool Use + +Force sequential tool calls: + +```python +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[...], + tool_choice={ + "type": "auto", + "disable_parallel_tool_use": True + }, + messages=[...] +) +``` + +## Advanced Patterns + +### Stateful Tools + +```python +class Calculator: + def __init__(self): + self.memory = 0 + + @beta_tool + def calculate(self, expression: str) -> float: + """Evaluate mathematical expression.""" + result = eval(expression) + self.memory = result + return result + + @beta_tool + def recall(self) -> float: + """Recall last calculation result.""" + return self.memory + +calc = Calculator() + +for message in client.beta.messages.tool_runner( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[calc.calculate, calc.recall], + messages=[{"role": "user", "content": "Calculate 5*8, then add 10"}] +): + if message.stop_reason == "end_turn": + print(message.content[0].text) +``` + +### Error Handling in Tools + +```python +@beta_tool +def divide(a: float, b: float) -> float: + """Divide two numbers.""" + if b == 0: + raise ValueError("Cannot divide by zero") + return a / b + +# In tool result +try: + result = divide(10, 0) + tool_result = str(result) + is_error = False +except Exception as e: + tool_result = str(e) + is_error = True + +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[...], + messages=[ + ..., + { + "role": "user", + "content": [{ + "type": "tool_result", + "tool_use_id": tool_use.id, + "content": tool_result, + "is_error": is_error + }] + } + ] +) +``` + +### Async Tools + +```python +from anthropic import beta_async_tool + +@beta_async_tool +async def fetch_data(url: str) -> dict: + """Fetch data from URL.""" + async with httpx.AsyncClient() as client: + response = await client.get(url) + return response.json() + +# Use with async tool runner +async for message in client.beta.messages.tool_runner( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[fetch_data], + messages=[...] +): + if message.stop_reason == "end_turn": + print(message.content[0].text) +``` + +## Best Practices + +### 1. Clear Tool Descriptions + +```python +@beta_tool +def search_database( + query: str, + limit: int = 10, + category: str | None = None +) -> list[dict]: + """ + Search database for items matching query. + + Args: + query: Search keywords or phrase + limit: Maximum results to return (1-100) + category: Optional category filter (e.g., "electronics", "books") + + Returns: + List of matching items with id, name, price + """ + ... +``` + +### 2. Validate Tool Inputs + +```python +@beta_tool +def set_temperature(degrees: float, unit: str = "celsius") -> dict: + """Set thermostat temperature.""" + if unit not in ["celsius", "fahrenheit"]: + raise ValueError(f"Invalid unit: {unit}") + if degrees < -50 or degrees > 50: + raise ValueError(f"Temperature out of range: {degrees}") + # Set temperature + return {"status": "success", "temperature": degrees, "unit": unit} +``` + +### 3. Return Structured Data + +```python +@beta_tool +def get_user_info(user_id: str) -> dict: + """Get user information.""" + return { + "id": user_id, + "name": "Alice Smith", + "email": "alice@example.com", + "created_at": "2024-01-15" + } +``` + +## See Also + +- [Tool Use API](../api/tools.md) - Complete API reference +- [Beta Features](../beta/index.md) - Tool runner and advanced features +- [Messages API](../api/messages.md) - Message creation diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/index.md b/.tessl/tiles/tessl/pypi-anthropic/docs/index.md new file mode 100644 index 0000000..747b2a9 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/index.md @@ -0,0 +1,398 @@ +# Anthropic Python SDK + +The official Python library for the Anthropic REST API, providing type-safe access to Claude AI models with both sync and async support. + +## Package Information + +- **Package Name**: anthropic +- **Package Type**: Python SDK +- **Language**: Python 3.8+ +- **Installation**: `pip install anthropic` +- **Repository**: https://github.com/anthropics/anthropic-sdk-python +- **License**: MIT + +## Installation + +```bash +pip install anthropic +``` + +Platform-specific extras: +- `pip install anthropic[bedrock]` - AWS Bedrock +- `pip install anthropic[vertex]` - Google Vertex AI +- `pip install anthropic[aiohttp]` - Alternative async client + +## Quick Start + +### Basic Message + +```python { .api } +from anthropic import Anthropic + +client = Anthropic() # Reads ANTHROPIC_API_KEY from environment + +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello, Claude"}] +) + +print(message.content[0].text) +``` + +### Async Message + +```python { .api } +from anthropic import AsyncAnthropic + +client = AsyncAnthropic() +message = await client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello"}] +) +``` + +### Stream Response + +```python { .api } +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Write a story"}] +) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) +``` + +## Available Models + +**Claude 4.5 (Latest):** +- `claude-opus-4-5-20250929` - Most capable +- `claude-sonnet-4-5-20250929` - Balanced (recommended) + +**Claude 3.5:** +- `claude-3-5-sonnet-20241022` - Previous Sonnet +- `claude-3-5-haiku-20241022` - Fast and cost-effective + +**[→ Complete model list and selection guide](./api/models.md)** + +## Common Tasks + +Choose based on your use case: + +**[Basic Messaging](./common-tasks/basic-messaging.md)** +- Simple text conversations +- System prompts +- Multi-turn conversations +- Temperature control + +**[Multimodal Input](./common-tasks/multimodal-input.md)** +- Image analysis (JPG, PNG, GIF, WebP) +- PDF document processing +- Mixed content (text + images + documents) + +**[Tool Integration](./common-tasks/tool-integration.md)** +- Function calling basics +- Auto-execution with tool_runner +- Async tools +- Error handling in tools + +**[Streaming Responses](./common-tasks/streaming-responses.md)** +- Real-time text streaming +- Event-based processing +- Token usage tracking +- Error handling + +**[Batch Processing](./guides/batch-processing.md)** +- Process thousands of requests +- 50% cost reduction +- High-throughput scenarios + +## API Quick Reference + +Fast lookup for method signatures and parameters: + +**[Messages API](./quick-reference/messages.md)** - Core message creation +- `create()` - Create single message +- `stream()` - Stream message response +- `count_tokens()` - Estimate token usage + +**[Streaming API](./quick-reference/streaming.md)** - Real-time response processing +- `stream()` - Context manager for streaming +- Event types and handling +- Helper methods + +**[Tools API](./quick-reference/tools.md)** - Function calling +- `@beta_tool` decorator +- `tool_runner()` - Auto-execution +- Manual tool handling + +**[Batches API](./quick-reference/batches.md)** - Async batch processing +- `create()` - Submit batch +- `retrieve()` - Check status +- `results()` - Get outputs + +**[Models API](./quick-reference/models.md)** - Model information +- `retrieve()` - Get model details +- `list()` - Browse available models + +## Detailed API Documentation + +In-depth reference with all parameters, types, and examples: + +### Core APIs + +- **[Messages API](./api/messages.md)** - Complete messages API reference with all parameters, types, and examples +- **[Streaming API](./api/streaming.md)** - Detailed streaming architecture, events, and patterns +- **[Tools API](./api/tools.md)** - Function calling with decorators and manual definitions +- **[Batches API](./api/batches.md)** - Batch processing for high-throughput use cases +- **[Models API](./api/models.md)** - Model information and selection +- **[Completions API](./api/completions.md)** - Legacy text completions API (deprecated, use Messages API instead) + +### Implementation Guides + +- **[Getting Started](./guides/getting-started.md)** - Installation, authentication, and first steps +- **[Multimodal Content](./guides/multimodal.md)** - Working with images, documents, and PDFs +- **[Tool Usage](./guides/tool-usage.md)** - Building agentic workflows with function calling +- **[Streaming Guide](./guides/streaming-guide.md)** - Advanced streaming patterns and best practices +- **[Batch Processing](./guides/batch-processing.md)** - Large-scale async message processing +- **[Error Handling](./guides/error-handling.md)** - Robust error management and retry strategies + +### Platform Integrations + +- **[AWS Bedrock](./platforms/bedrock.md)** - Use Claude on AWS infrastructure +- **[Google Vertex AI](./platforms/vertex.md)** - Use Claude on GCP infrastructure +- **[Azure AI Foundry](./platforms/foundry.md)** - Use Claude on Azure infrastructure + +### Reference Documentation + +- **[Client Configuration](./reference/client-config.md)** - Client initialization, timeouts, retries, HTTP options +- **[Type System](./reference/types.md)** - Complete Pydantic type definitions +- **[Error Handling](./reference/errors.md)** - Exception hierarchy and error types +- **[Pagination](./reference/pagination.md)** - List operation pagination patterns +- **[Utilities](./reference/utilities.md)** - Helper functions and utilities + +## Beta Features + +Access experimental capabilities via `client.beta` namespace: + +**[Beta Overview](./beta/index.md)** - Introduction to beta features + +**Message Enhancement Features:** +- **[Extended Thinking](./beta/message-features.md#extended-thinking)** - Long-form reasoning with budget control +- **[Citations](./beta/message-features.md#citations)** - Source attribution for document-based responses +- **[Web Search](./beta/message-features.md#web-search)** - Real-time web information retrieval +- **[Code Execution](./beta/message-features.md#code-execution)** - Python sandbox execution +- **[Computer Use](./beta/message-features.md#computer-use)** - GUI interaction capabilities +- **[MCP Integration](./beta/message-features.md#mcp-integration)** - Model Context Protocol tools + +**[→ All Message Features](./beta/message-features.md)** + +**Resource Management:** +- **[Skills API](./beta/skills.md)** - Create and manage reusable capabilities +- **[Files API](./beta/files.md)** - Upload and manage document files +- **[Beta Batches](./beta/batches.md)** - Batch processing with beta features + +## Client Configuration + +### Basic Setup + +```python { .api } +from anthropic import Anthropic + +# Environment variable (recommended) +client = Anthropic() # Uses ANTHROPIC_API_KEY + +# Explicit API key +client = Anthropic(api_key="your-api-key") + +# Context manager (automatic cleanup) +with Anthropic() as client: + message = client.messages.create(...) +``` + +### Common Configurations + +```python { .api } +import httpx + +# Custom timeout +client = Anthropic(timeout=120.0) + +# Granular timeout +client = Anthropic( + timeout=httpx.Timeout( + connect=10.0, + read=60.0, + write=10.0, + pool=10.0 + ) +) + +# Retry configuration +client = Anthropic(max_retries=5) + +# Custom headers +client = Anthropic( + default_headers={"X-Custom": "value"} +) +``` + +**[→ Complete configuration reference](./reference/client-config.md)** + +## Error Handling + +### Basic Pattern + +```python +from anthropic import APIError, RateLimitError + +try: + message = client.messages.create(...) +except RateLimitError as e: + retry_after = e.response.headers.get("retry-after") + print(f"Rate limited. Retry after {retry_after}s") +except APIError as e: + print(f"API error: {e.message}") +``` + +### Exception Hierarchy + +```python { .api } +AnthropicError +├── APIError +│ ├── APIStatusError +│ │ ├── BadRequestError (400) +│ │ ├── AuthenticationError (401) +│ │ ├── PermissionDeniedError (403) +│ │ ├── NotFoundError (404) +│ │ ├── RateLimitError (429) +│ │ ├── InternalServerError (≥500) +│ ├── APIConnectionError +│ ├── APITimeoutError +│ └── APIResponseValidationError +``` + +**[→ Complete error reference and retry patterns](./reference/errors.md)** + +**[→ Error handling guide with advanced patterns](./guides/error-handling.md)** + +## Environment Variables + +- `ANTHROPIC_API_KEY` - API key for authentication (required) +- `ANTHROPIC_BASE_URL` - Override base URL (optional) +- `ANTHROPIC_AUTH_TOKEN` - Bearer token alternative (optional) + +Platform-specific variables documented in platform guides. + +## SDK Architecture + +### Client Hierarchy + +- **Anthropic / AsyncAnthropic** - Main clients for direct API access +- **AnthropicBedrock / AsyncAnthropicBedrock** - AWS Bedrock integration +- **AnthropicVertex / AsyncAnthropicVertex** - Google Vertex AI integration +- **AnthropicFoundry / AsyncAnthropicFoundry** - Azure AI Foundry integration + +### Resource Structure + +```python { .api } +client.messages # Messages resource + .create() # Create message + .stream() # Stream message + .count_tokens() # Count tokens + .batches # Batches sub-resource + .create() # Create batch + .retrieve() # Get batch status + .list() # List batches + .cancel() # Cancel batch + .delete() # Delete batch + .results() # Get batch results + +client.beta # Beta features namespace + .messages # Beta messages with additional features + .create() # Create with beta features + .stream() # Stream with beta features + .tool_runner() # Auto-execute tools + .skills # Skills management + .files # File management + +client.models # Models information + .retrieve() # Get model info + .list() # List models +``` + +## Type System + +All requests and responses use Pydantic models for type safety: + +```python { .api } +class Message(BaseModel): + id: str + type: Literal["message"] + role: Literal["assistant"] + content: list[ContentBlock] + model: str + stop_reason: StopReason | None + usage: Usage + +ContentBlock = Union[TextBlock, ToolUseBlock] +StopReason = Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"] +``` + +**[→ Complete type definitions](./reference/types.md)** + +## Decision Guide for Common Scenarios + +### "I need to send a message to Claude" +→ **[Basic Messaging](./common-tasks/basic-messaging.md)** or **[Messages API](./api/messages.md)** + +### "I need to process images or PDFs" +→ **[Multimodal Input](./common-tasks/multimodal-input.md)** or **[Multimodal Guide](./guides/multimodal.md)** + +### "I need Claude to call functions/use tools" +→ **[Tool Integration](./common-tasks/tool-integration.md)** or **[Tools API](./api/tools.md)** + +### "I need real-time streaming output" +→ **[Streaming Responses](./common-tasks/streaming-responses.md)** or **[Streaming API](./api/streaming.md)** + +### "I need to process thousands of messages" +→ **[Batch Processing Guide](./guides/batch-processing.md)** or **[Batches API](./api/batches.md)** + +### "I'm getting errors" +→ **[Error Reference](./reference/errors.md)** or **[Error Handling Guide](./guides/error-handling.md)** + +### "I need extended reasoning/thinking" +→ **[Beta Overview](./beta/index.md)** → **[Extended Thinking](./beta/message-features.md#extended-thinking)** + +### "I need web search or code execution" +→ **[Beta Overview](./beta/index.md)** → **[Message Features](./beta/message-features.md)** + +### "I'm using AWS/GCP/Azure" +→ **[Platform Integrations](#platform-integrations)** → Choose your platform + +## Package Constants + +```python { .api } +# Client Configuration Constants +DEFAULT_TIMEOUT: float = 600.0 # 10 minutes default timeout for requests +DEFAULT_MAX_RETRIES: int = 2 # Default number of retry attempts +DEFAULT_CONNECTION_LIMITS: httpx.Limits # Default HTTP connection pool limits + +# Legacy Text Completion Prompt Constants +HUMAN_PROMPT: str = "\n\nHuman:" # Legacy prompt marker for human messages +AI_PROMPT: str = "\n\nAssistant:" # Legacy prompt marker for AI responses + +# Sentinel Values +NOT_GIVEN: NotGiven # Sentinel indicating parameter not provided +``` + +**Note**: `HUMAN_PROMPT` and `AI_PROMPT` are legacy constants for the deprecated Text Completions API. Use the Messages API instead for new applications. + +## Support Resources + +- **API Documentation**: https://docs.anthropic.com +- **SDK Repository**: https://github.com/anthropics/anthropic-sdk-python +- **Model Pricing**: https://anthropic.com/pricing +- **Rate Limits**: https://docs.anthropic.com/rate-limits diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/platforms/bedrock.md b/.tessl/tiles/tessl/pypi-anthropic/docs/platforms/bedrock.md new file mode 100644 index 0000000..c08df71 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/platforms/bedrock.md @@ -0,0 +1,148 @@ +# AWS Bedrock Integration + +Use Claude models via AWS Bedrock with automatic authentication and region configuration. + +## Installation + +```bash +pip install anthropic[bedrock] +``` + +## Client Initialization + +```python { .api } +class AnthropicBedrock: + """Synchronous client for Claude on AWS Bedrock.""" + + def __init__( + self, + *, + aws_access_key: str | None = None, + aws_secret_key: str | None = None, + aws_session_token: str | None = None, + aws_region: str | None = None, + aws_profile: str | None = None, + timeout: float | httpx.Timeout = DEFAULT_TIMEOUT, + max_retries: int = DEFAULT_MAX_RETRIES, + default_headers: dict[str, str] | None = None, + http_client: httpx.Client | None = None, + ): + """ + Initialize Bedrock client. + + Parameters: + aws_access_key: AWS access key ID (or AWS_ACCESS_KEY_ID env var) + aws_secret_key: AWS secret access key (or AWS_SECRET_ACCESS_KEY env var) + aws_session_token: AWS session token (or AWS_SESSION_TOKEN env var) + aws_region: AWS region (or AWS_REGION env var, default: us-east-1) + aws_profile: AWS profile from ~/.aws/credentials + timeout: Request timeout + max_retries: Maximum retry attempts + default_headers: Custom headers + http_client: Custom httpx.Client + """ + ... + +class AsyncAnthropicBedrock: + """Asynchronous client for Claude on AWS Bedrock.""" + # Same parameters as AnthropicBedrock + ... +``` + +## Bedrock Model Identifiers + +```python { .api } +# Claude 3.5 on Bedrock +"anthropic.claude-3-5-sonnet-20241022-v2:0" +"anthropic.claude-3-5-sonnet-20240620-v1:0" +"anthropic.claude-3-5-haiku-20241022-v1:0" + +# Claude 3 on Bedrock +"anthropic.claude-3-opus-20240229-v1:0" +"anthropic.claude-3-sonnet-20240229-v1:0" +"anthropic.claude-3-haiku-20240307-v1:0" +``` + +## Quick Examples + +### Basic Usage + +```python +from anthropic import AnthropicBedrock + +client = AnthropicBedrock() # Uses environment variables + +message = client.messages.create( + model="anthropic.claude-3-5-sonnet-20241022-v2:0", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello!"}] +) +``` + +### Explicit Credentials + +```python +client = AnthropicBedrock( + aws_access_key="AKIA...", + aws_secret_key="wJal...", + aws_region="us-east-1" +) +``` + +### Using AWS Profile + +```python +client = AnthropicBedrock( + aws_profile="production", + aws_region="us-west-2" +) +``` + +### Streaming + +```python +with client.messages.stream( + model="anthropic.claude-3-5-sonnet-20241022-v2:0", + max_tokens=1024, + messages=[{"role": "user", "content": "Write a story"}] +) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) +``` + +### Multi-Region Deployment + +```python +regions = ["us-east-1", "us-west-2", "eu-west-1"] + +for region in regions: + client = AnthropicBedrock(aws_region=region) + message = client.messages.create(...) + client.close() +``` + +### Async Client + +```python +import asyncio +from anthropic import AsyncAnthropicBedrock + +async def main(): + client = AsyncAnthropicBedrock(aws_region="us-east-1") + message = await client.messages.create(...) + await client.close() + +asyncio.run(main()) +``` + +## Environment Variables + +- `AWS_ACCESS_KEY_ID` - AWS access key +- `AWS_SECRET_ACCESS_KEY` - AWS secret key +- `AWS_SESSION_TOKEN` - AWS session token +- `AWS_REGION` - AWS region (default: us-east-1) + +## See Also + +- [Messages API](../api/messages.md) - Core message creation +- [Streaming API](../api/streaming.md) - Streaming responses diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/platforms/foundry.md b/.tessl/tiles/tessl/pypi-anthropic/docs/platforms/foundry.md new file mode 100644 index 0000000..6868c08 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/platforms/foundry.md @@ -0,0 +1,144 @@ +# Azure AI Foundry Integration + +Access Claude models via Azure AI Foundry with API key or Azure AD authentication. + +## Overview + +Azure AI Foundry provides access to Claude through Microsoft's Azure AI platform with: +- Azure API key authentication +- Azure Active Directory (AAD) token provider authentication +- Automatic base URL construction from resource names + +## Client Initialization + +```python { .api } +class AnthropicFoundry: + def __init__( + self, + *, + resource: str | None = None, + api_key: str | None = None, + azure_ad_token_provider: Callable[[], str] | None = None, + base_url: str | None = None, + timeout: float | httpx.Timeout = DEFAULT_TIMEOUT, + max_retries: int = DEFAULT_MAX_RETRIES, + default_headers: dict[str, str] | None = None, + http_client: httpx.Client | None = None, + ): + """ + Initialize Azure AI Foundry client. + + Parameters: + resource: Azure resource name (e.g., "my-resource" for + https://my-resource.services.ai.azure.com/anthropic/) + api_key: Azure API key (or ANTHROPIC_FOUNDRY_API_KEY env var) + azure_ad_token_provider: Function returning Azure AD token + base_url: Full base URL (mutually exclusive with resource) + timeout: Request timeout + max_retries: Maximum retry attempts + default_headers: Custom headers + http_client: Custom httpx.Client + + Environment Variables: + ANTHROPIC_FOUNDRY_API_KEY: Default API key + ANTHROPIC_FOUNDRY_RESOURCE: Default resource name + ANTHROPIC_FOUNDRY_BASE_URL: Default base URL + + Note: + Must provide either api_key or azure_ad_token_provider. + Must provide either resource or base_url. + """ + ... + +class AsyncAnthropicFoundry: + # Same parameters, but azure_ad_token_provider can return Awaitable[str] + ... +``` + +## Quick Examples + +### API Key Authentication + +```python +from anthropic import AnthropicFoundry + +client = AnthropicFoundry( + resource="my-resource", + api_key="your-api-key" +) + +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello!"}] +) +``` + +### Azure AD Token Provider + +```python +def get_azure_ad_token(): + """Fetch Azure AD token using your preferred method.""" + # Implement token retrieval + return "your-azure-ad-token" + +client = AnthropicFoundry( + resource="my-resource", + azure_ad_token_provider=get_azure_ad_token +) +``` + +### Using Base URL Directly + +```python +client = AnthropicFoundry( + base_url="https://my-resource.services.ai.azure.com/anthropic/", + api_key="your-api-key" +) +``` + +### Async Client + +```python +import asyncio +from anthropic import AsyncAnthropicFoundry + +async def main(): + client = AsyncAnthropicFoundry( + resource="my-resource", + api_key="your-api-key" + ) + message = await client.messages.create(...) + +asyncio.run(main()) +``` + +### Streaming + +```python +with client.messages.stream( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + messages=[{"role": "user", "content": "Write a haiku"}] +) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) +``` + +## Limitations + +The Azure AI Foundry integration has the following limitations: +1. **No Models API**: `client.models` resource not available +2. **No Message Batches**: `client.messages.batches` resource not available +3. **No Beta Batches**: `client.beta.messages.batches` resource not available + +## Environment Variables + +- `ANTHROPIC_FOUNDRY_API_KEY` - Default API key +- `ANTHROPIC_FOUNDRY_RESOURCE` - Default resource name +- `ANTHROPIC_FOUNDRY_BASE_URL` - Default base URL + +## See Also + +- [Messages API](../api/messages.md) - Core message creation +- [Streaming API](../api/streaming.md) - Streaming responses diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/platforms/vertex.md b/.tessl/tiles/tessl/pypi-anthropic/docs/platforms/vertex.md new file mode 100644 index 0000000..3756993 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/platforms/vertex.md @@ -0,0 +1,157 @@ +# Google Vertex AI Integration + +Access Claude models on Google Cloud Platform with Vertex AI integration. + +## Installation + +```bash +pip install anthropic[vertex] +``` + +## Client Initialization + +```python { .api } +class AnthropicVertex: + """Synchronous client for Claude on Google Vertex AI.""" + + def __init__( + self, + *, + project_id: str | None = None, + region: str | None = None, + timeout: float | httpx.Timeout = DEFAULT_TIMEOUT, + max_retries: int = DEFAULT_MAX_RETRIES, + default_headers: dict[str, str] | None = None, + http_client: httpx.Client | None = None, + ): + """ + Initialize Vertex AI client. + + Parameters: + project_id: GCP project ID (or CLOUD_ML_PROJECT_ID/GOOGLE_CLOUD_PROJECT env var) + region: GCP region (or CLOUD_ML_REGION env var, default: us-east5) + timeout: Request timeout + max_retries: Maximum retry attempts + default_headers: Custom headers + http_client: Custom httpx.Client + """ + ... + +class AsyncAnthropicVertex: + """Asynchronous client for Claude on Google Vertex AI.""" + # Same parameters as AnthropicVertex + ... +``` + +## Vertex Model Identifiers + +```python { .api } +# Claude models on Vertex AI +"claude-3-5-sonnet-v2@20241022" +"claude-3-5-sonnet@20240620" +"claude-3-5-haiku@20241022" +"claude-3-opus@20240229" +"claude-3-sonnet@20240229" +"claude-3-haiku@20240307" +``` + +## Quick Examples + +### Basic Usage + +```python +from anthropic import AnthropicVertex + +client = AnthropicVertex( + project_id="my-gcp-project", + region="us-east5" +) + +message = client.messages.create( + model="claude-3-5-sonnet-v2@20241022", + max_tokens=1024, + messages=[{"role": "user", "content": "Hello!"}] +) +``` + +### Using Environment Variables + +```python +# Set: CLOUD_ML_PROJECT_ID=my-gcp-project +# Set: CLOUD_ML_REGION=us-east5 + +client = AnthropicVertex() # Automatically uses env vars +``` + +### Using Application Default Credentials + +```python +# First authenticate: gcloud auth application-default login + +client = AnthropicVertex( + project_id="my-gcp-project", + region="us-east5" +) +``` + +### Streaming + +```python +with client.messages.stream( + model="claude-3-5-sonnet-v2@20241022", + max_tokens=1024, + messages=[{"role": "user", "content": "Write a story"}] +) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) +``` + +### Multi-Region Deployment + +```python +regions = ["us-east5", "europe-west1", "asia-southeast1"] + +for region in regions: + client = AnthropicVertex( + project_id="my-gcp-project", + region=region + ) + message = client.messages.create(...) + client.close() +``` + +### Async Client + +```python +import asyncio +from anthropic import AsyncAnthropicVertex + +async def main(): + client = AsyncAnthropicVertex( + project_id="my-gcp-project", + region="us-east5" + ) + message = await client.messages.create(...) + await client.close() + +asyncio.run(main()) +``` + +## Environment Variables + +- `CLOUD_ML_PROJECT_ID` or `GOOGLE_CLOUD_PROJECT` - GCP project ID +- `CLOUD_ML_REGION` - GCP region (default: us-east5) +- `GOOGLE_APPLICATION_CREDENTIALS` - Path to service account key JSON + +## Available Regions + +- `us-east5` +- `us-central1` +- `europe-west1` +- `europe-west4` +- `asia-southeast1` + +## See Also + +- [Messages API](../api/messages.md) - Core message creation +- [Streaming API](../api/streaming.md) - Streaming responses diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/quick-reference/batches.md b/.tessl/tiles/tessl/pypi-anthropic/docs/quick-reference/batches.md new file mode 100644 index 0000000..3e9dfd4 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/quick-reference/batches.md @@ -0,0 +1,157 @@ +# Batches API - Quick Reference + +Compact API signatures for batch processing. For examples, see **[Batches API Reference](../api/batches.md)**. + +## create() + +```python { .api } +def create( + self, + *, + requests: list[MessageBatchIndividualRequest], # Required: Batch requests + **kwargs +) -> MessageBatch +``` + +**Async:** `async def create(...) -> MessageBatch` + +## retrieve() + +```python { .api } +def retrieve( + self, + message_batch_id: str, # Required: Batch ID + **kwargs +) -> MessageBatch +``` + +**Async:** `async def retrieve(...) -> MessageBatch` + +## list() + +```python { .api } +def list( + self, + *, + before_id: str = NOT_GIVEN, + after_id: str = NOT_GIVEN, + limit: int = NOT_GIVEN, + **kwargs +) -> SyncPage[MessageBatch] +``` + +**Async:** `async def list(...) -> AsyncPage[MessageBatch]` + +## cancel() + +```python { .api } +def cancel( + self, + message_batch_id: str, # Required: Batch ID to cancel + **kwargs +) -> MessageBatch +``` + +**Async:** `async def cancel(...) -> MessageBatch` + +## delete() + +```python { .api } +def delete( + self, + message_batch_id: str, # Required: Batch ID to delete + **kwargs +) -> DeletedMessageBatch +``` + +**Async:** `async def delete(...) -> DeletedMessageBatch` + +## results() + +```python { .api } +def results( + self, + message_batch_id: str, # Required: Batch ID + **kwargs +) -> JSONLDecoder[MessageBatchIndividualResponse]: + """Stream batch results as JSONL""" + ... +``` + +**Async:** `async def results(...) -> AsyncJSONLDecoder[MessageBatchIndividualResponse]` + +## Key Types + +```python { .api } +class MessageBatchIndividualRequest(TypedDict): + custom_id: str # Your request identifier + params: MessageCreateParams # Same as messages.create() params + +class MessageBatch(BaseModel): + id: str + type: Literal["message_batch"] + processing_status: Literal["in_progress", "canceling", "ended"] + request_counts: MessageBatchRequestCounts + ended_at: str | None + created_at: str + expires_at: str + cancel_initiated_at: str | None + results_url: str | None + +class MessageBatchRequestCounts(BaseModel): + processing: int + succeeded: int + errored: int + canceled: int + expired: int + +class MessageBatchIndividualResponse(BaseModel): + custom_id: str + result: MessageBatchSucceededResult | MessageBatchErroredResult + +class MessageBatchSucceededResult(BaseModel): + type: Literal["succeeded"] + message: Message + +class MessageBatchErroredResult(BaseModel): + type: Literal["errored"] + error: ErrorObject +``` + +## Common Patterns + +```python +# Create batch +batch = client.messages.batches.create( + requests=[ + { + "custom_id": "req-1", + "params": { + "model": "claude-sonnet-4-5-20250929", + "max_tokens": 1024, + "messages": [{"role": "user", "content": "Hello"}] + } + } + ] +) + +# Poll until complete +import time +while True: + batch = client.messages.batches.retrieve(batch.id) + if batch.processing_status == "ended": + break + time.sleep(60) + +# Process results +for response in client.messages.batches.results(batch.id): + if response.result.type == "succeeded": + print(response.result.message.content[0].text) + elif response.result.type == "errored": + print(f"Error: {response.result.error.message}") +``` + +## See Also + +- **[Complete Batches Documentation](../api/batches.md)** - Full details and examples +- **[Batch Processing Guide](../guides/batch-processing.md)** - Advanced patterns and best practices diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/quick-reference/messages.md b/.tessl/tiles/tessl/pypi-anthropic/docs/quick-reference/messages.md new file mode 100644 index 0000000..57f4c2e --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/quick-reference/messages.md @@ -0,0 +1,97 @@ +# Messages API - Quick Reference + +Compact API signatures for the Messages API. For examples and detailed documentation, see **[Messages API Reference](../api/messages.md)**. + +## create() + +```python { .api } +def create( + self, + *, + model: str, # Required: Model ID + messages: list[MessageParam], # Required: Conversation messages + max_tokens: int, # Required: Max generation tokens + system: str | list[TextBlockParam] = NOT_GIVEN, # System prompt + metadata: MetadataParam = NOT_GIVEN, # Request metadata (user_id) + stop_sequences: list[str] = NOT_GIVEN, # Custom stop sequences (max 4) + stream: bool = False, # Enable streaming (use stream() instead) + temperature: float = NOT_GIVEN, # Sampling temperature 0.0-1.0 + top_p: float = NOT_GIVEN, # Nucleus sampling 0.0-1.0 + top_k: int = NOT_GIVEN, # Top-k sampling + tools: list[ToolParam] = NOT_GIVEN, # Available tools + tool_choice: ToolChoice = NOT_GIVEN, # Tool selection control + service_tier: Literal["auto", "standard_only"] = NOT_GIVEN, # Service tier + thinking: ThinkingConfigParam = NOT_GIVEN, # Extended thinking (beta) +) -> Message +``` + +**Async:** `async def create(...) -> Message` + +**Raises:** `BadRequestError`, `AuthenticationError`, `RateLimitError`, `InternalServerError` + +## stream() + +```python { .api } +def stream( + self, + *, + model: str, # Required: Model ID + messages: list[MessageParam], # Required: Conversation messages + max_tokens: int, # Required: Max generation tokens + **kwargs # All create() parameters supported +) -> MessageStreamManager +``` + +**Returns:** Context manager with `.text_stream`, `.get_final_message()`, `.get_final_text()` + +**Async:** `async def stream(...) -> AsyncMessageStreamManager` + +## count_tokens() + +```python { .api } +def count_tokens( + self, + *, + model: str, # Required: Model ID + messages: list[MessageParam], # Required: Messages to count + system: str | list[TextBlockParam] = NOT_GIVEN, # System prompt + tools: list[ToolParam] = NOT_GIVEN, # Tools to include + tool_choice: ToolChoice = NOT_GIVEN, # Tool choice config + thinking: ThinkingConfigParam = NOT_GIVEN, # Thinking config +) -> MessageTokensCount +``` + +**Returns:** `MessageTokensCount` with `.input_tokens` + +**Async:** `async def count_tokens(...) -> MessageTokensCount` + +## Key Types + +```python { .api } +class MessageParam(TypedDict): + role: Literal["user", "assistant"] + content: str | list[ContentBlockParam] + +class Message(BaseModel): + id: str + type: Literal["message"] + role: Literal["assistant"] + content: list[ContentBlock] # TextBlock | ToolUseBlock + model: str + stop_reason: StopReason | None + usage: Usage + +class Usage(BaseModel): + input_tokens: int + output_tokens: int + cache_creation_input_tokens: int | None + cache_read_input_tokens: int | None + +StopReason = Literal["end_turn", "max_tokens", "stop_sequence", "tool_use"] +``` + +## See Also + +- **[Complete API Documentation](../api/messages.md)** - Full details with examples +- **[Basic Messaging Tasks](../common-tasks/basic-messaging.md)** - Task-oriented guide +- **[Type System Reference](../reference/types.md)** - Complete type definitions diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/quick-reference/models.md b/.tessl/tiles/tessl/pypi-anthropic/docs/quick-reference/models.md new file mode 100644 index 0000000..2c27bdf --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/quick-reference/models.md @@ -0,0 +1,98 @@ +# Models API - Quick Reference + +Compact API signatures for model information. For examples, see **[Models API Reference](../api/models.md)**. + +## retrieve() + +```python { .api } +def retrieve( + self, + model_id: str, # Required: Model identifier + **kwargs +) -> ModelInfo +``` + +**Async:** `async def retrieve(...) -> ModelInfo` + +## list() + +```python { .api } +def list( + self, + *, + before_id: str = NOT_GIVEN, + after_id: str = NOT_GIVEN, + limit: int = NOT_GIVEN, + **kwargs +) -> SyncPage[ModelInfo] +``` + +**Async:** `async def list(...) -> AsyncPage[ModelInfo]` + +## Response Type + +```python { .api } +class ModelInfo(BaseModel): + id: str # Model identifier + type: Literal["model"] + display_name: str # Human-readable name + created_at: str # ISO 8601 timestamp +``` + +## Available Model IDs + +```python { .api } +# Claude 4.5 (Latest) +"claude-opus-4-5-20250929" # Most capable model +"claude-sonnet-4-5-20250929" # Balanced intelligence and speed + +# Claude 3.5 +"claude-3-5-sonnet-20241022" # Previous Sonnet version +"claude-3-5-sonnet-20240620" # Earlier Sonnet version +"claude-3-5-haiku-20241022" # Fast, cost-effective + +# Claude 3 +"claude-3-opus-20240229" # Powerful, intelligent +"claude-3-sonnet-20240229" # Balanced +"claude-3-haiku-20240307" # Fast and efficient + +# Legacy (Claude 2) +"claude-2.1" +"claude-2.0" +"claude-instant-1.2" +``` + +## Common Patterns + +```python +# Get specific model info +model = client.models.retrieve("claude-sonnet-4-5-20250929") +print(f"{model.display_name} created: {model.created_at}") + +# List all models +for model in client.models.list(): + print(f"{model.id}: {model.display_name}") + +# Check if model exists +from anthropic import NotFoundError + +def model_exists(model_id: str) -> bool: + try: + client.models.retrieve(model_id) + return True + except NotFoundError: + return False +``` + +## Model Selection Guide + +**Choose based on requirements:** + +- **claude-opus-4-5-20250929** - Complex tasks requiring maximum capability and reasoning +- **claude-sonnet-4-5-20250929** - Balanced performance for most use cases (recommended) +- **claude-3-5-haiku-20241022** - Fast responses and cost-effective for simple tasks +- **claude-3-5-sonnet-20241022** - Previous Sonnet version for compatibility + +## See Also + +- **[Complete Models Documentation](../api/models.md)** - Full details and selection examples diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/quick-reference/streaming.md b/.tessl/tiles/tessl/pypi-anthropic/docs/quick-reference/streaming.md new file mode 100644 index 0000000..568c332 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/quick-reference/streaming.md @@ -0,0 +1,115 @@ +# Streaming API - Quick Reference + +Compact API signatures for streaming. For examples and patterns, see **[Streaming API Reference](../api/streaming.md)**. + +## stream() + +```python { .api } +def stream( + self, + *, + model: str, + messages: list[MessageParam], + max_tokens: int, + **kwargs # All messages.create() parameters supported +) -> MessageStreamManager +``` + +**Async:** `async def stream(...) -> AsyncMessageStreamManager` + +## MessageStreamManager + +```python { .api } +class MessageStreamManager: + def __enter__(self) -> MessageStream + def __exit__(self, exc_type, exc_val, exc_tb) -> None +``` + +## MessageStream + +```python { .api } +class MessageStream: + def __iter__(self) -> Iterator[MessageStreamEvent] + + @property + def text_stream(self) -> Iterator[str]: + """Iterate over text deltas only""" + ... + + @property + def current_message_snapshot(self) -> Message: + """Current accumulated message during streaming""" + ... + + def get_final_message(self) -> Message: + """Complete message after stream ends""" + ... + + def get_final_text(self) -> str: + """Accumulated text after stream ends""" + ... +``` + +**Async:** `AsyncMessageStream` with `async` versions of all methods + +## Stream Events + +```python { .api } +MessageStreamEvent = Union[ + MessageStartEvent, # Stream started + MessageDeltaEvent, # Usage/stop_reason updated + MessageStopEvent, # Stream completed + ContentBlockStartEvent, # New content block + ContentBlockDeltaEvent, # Content delta + ContentBlockStopEvent, # Block completed +] + +class MessageStartEvent(BaseModel): + type: Literal["message_start"] + message: Message # Initial skeleton + +class ContentBlockDeltaEvent(BaseModel): + type: Literal["content_block_delta"] + index: int + delta: ContentBlockDelta # TextDelta | InputJSONDelta + +class TextDelta(BaseModel): + type: Literal["text_delta"] + text: str # Incremental text to append + +class MessageDeltaEvent(BaseModel): + type: Literal["message_delta"] + delta: MessageDelta # stop_reason, stop_sequence + usage: MessageDeltaUsage # output_tokens + +class MessageStopEvent(BaseModel): + type: Literal["message_stop"] +``` + +## Common Patterns + +```python +# Text streaming +with client.messages.stream(...) as stream: + for text in stream.text_stream: + print(text, end="", flush=True) + +# Event processing +with client.messages.stream(...) as stream: + for event in stream: + if event.type == "content_block_delta": + if event.delta.type == "text_delta": + print(event.delta.text, end="") + +# Get final message +with client.messages.stream(...) as stream: + for text in stream.text_stream: + print(text, end="") +message = stream.get_final_message() +``` + +## See Also + +- **[Complete Streaming Documentation](../api/streaming.md)** - All event types and patterns +- **[Streaming Tasks Guide](../common-tasks/streaming-responses.md)** - Task-oriented examples +- **[Streaming Guide](../guides/streaming-guide.md)** - Advanced patterns diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/quick-reference/tools.md b/.tessl/tiles/tessl/pypi-anthropic/docs/quick-reference/tools.md new file mode 100644 index 0000000..afc5484 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/quick-reference/tools.md @@ -0,0 +1,159 @@ +# Tools API - Quick Reference + +Compact API signatures for tool use (function calling). For examples, see **[Tools API Reference](../api/tools.md)**. + +## Decorators + +```python { .api } +def beta_tool(func: Callable) -> BetaFunctionTool: + """ + Convert sync function to tool with auto-generated schema. + Function must have type hints and docstring. + """ + ... + +def beta_async_tool(func: Callable) -> BetaAsyncFunctionTool: + """ + Convert async function to tool with auto-generated schema. + Function must have type hints and docstring. + """ + ... +``` + +## Tool Runner + +```python { .api } +def tool_runner( + self, + *, + model: str, + messages: list[MessageParam], + max_tokens: int, + tools: list[ToolParam | BetaFunctionTool], + tool_choice: ToolChoice = NOT_GIVEN, + **kwargs # Additional messages.create() parameters +) -> Iterator[BetaMessage]: + """ + Auto-execute tools in agentic loop. + Yields message after each turn until stop_reason == "end_turn". + """ + ... +``` + +**Async:** `async def tool_runner(...) -> AsyncIterator[BetaMessage]` + +## Tool Types + +```python { .api } +class ToolParam(TypedDict): + name: str # Tool identifier + description: str # Tool purpose (for Claude) + input_schema: dict[str, Any] # JSON Schema (draft 2020-12) + cache_control: NotRequired[CacheControlEphemeral] + +ToolChoice = Union[ + ToolChoiceAuto, # Let Claude decide (default) + ToolChoiceAny, # Force Claude to use any tool + ToolChoiceNone, # Disable tools + ToolChoiceTool, # Force specific tool +] + +class ToolChoiceAuto(TypedDict): + type: Literal["auto"] + disable_parallel_tool_use: NotRequired[bool] + +class ToolChoiceAny(TypedDict): + type: Literal["any"] + disable_parallel_tool_use: NotRequired[bool] + +class ToolChoiceTool(TypedDict): + type: Literal["tool"] + name: str # Tool name to force + disable_parallel_tool_use: NotRequired[bool] +``` + +## Response Types + +```python { .api } +class ToolUseBlock(BaseModel): + """Tool invocation in assistant response""" + type: Literal["tool_use"] + id: str # Use in tool_result + name: str # Tool name + input: dict[str, Any] # Tool parameters + +class ToolResultBlockParam(TypedDict): + """Tool result in user message""" + type: Literal["tool_result"] + tool_use_id: str # ID from ToolUseBlock + content: NotRequired[str | list[TextBlockParam | ImageBlockParam]] + is_error: NotRequired[bool] # True if tool execution failed + cache_control: NotRequired[CacheControlEphemeral] +``` + +## Function Tool Classes + +```python { .api } +class BetaFunctionTool: + name: str + description: str + input_schema: dict[str, Any] + func: Callable + + def __call__(self, **kwargs) -> Any: ... + def to_param(self) -> ToolParam: ... + +class BetaAsyncFunctionTool: + name: str + description: str + input_schema: dict[str, Any] + func: Callable + + async def __call__(self, **kwargs) -> Any: ... + def to_param(self) -> ToolParam: ... +``` + +## Common Patterns + +```python +# Decorator usage +@beta_tool +def get_weather(location: str, unit: str = "fahrenheit") -> dict: + """Get weather for location. + + Args: + location: City and state + unit: Temperature unit + """ + return {"temp": 72, "unit": unit} + +# Tool runner (auto-execution) +for message in client.beta.messages.tool_runner( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[get_weather], + messages=[{"role": "user", "content": "What's the weather in NYC?"}] +): + if message.stop_reason == "end_turn": + print(message.content[0].text) + +# Manual tool handling +message = client.messages.create( + model="claude-sonnet-4-5-20250929", + max_tokens=1024, + tools=[get_weather.to_param()], + messages=[...] +) + +# Extract tool use +for block in message.content: + if block.type == "tool_use": + result = get_weather(**block.input) + # Send result back in next request +``` + +## See Also + +- **[Complete Tools Documentation](../api/tools.md)** - Full details and examples +- **[Tool Integration Tasks](../common-tasks/tool-integration.md)** - Task-oriented guide +- **[Tool Usage Guide](../guides/tool-usage.md)** - Advanced patterns diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/reference/client-config.md b/.tessl/tiles/tessl/pypi-anthropic/docs/reference/client-config.md new file mode 100644 index 0000000..52b295c --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/reference/client-config.md @@ -0,0 +1,305 @@ +# Client Configuration Reference + +Initialize and configure Anthropic API clients for synchronous and asynchronous operations. + +## Synchronous Client + +```python { .api } +class Anthropic: + """Synchronous client for Anthropic API.""" + + def __init__( + self, + *, + api_key: str | None = None, + auth_token: str | None = None, + base_url: str | httpx.URL | None = None, + timeout: float | httpx.Timeout = DEFAULT_TIMEOUT, + max_retries: int = DEFAULT_MAX_RETRIES, + default_headers: dict[str, str] | None = None, + default_query: dict[str, object] | None = None, + http_client: httpx.Client | None = None, + ): + """ + Initialize Anthropic client. + + Parameters: + api_key: API key (defaults to ANTHROPIC_API_KEY env var) + auth_token: Bearer token (alternative to api_key) + base_url: Override base URL (defaults to ANTHROPIC_BASE_URL env var) + timeout: Request timeout (default: 600s) + max_retries: Maximum retry attempts (default: 2) + default_headers: Headers added to all requests + default_query: Query parameters added to all requests + http_client: Custom httpx.Client instance + """ + ... + + def close(self) -> None: + """Close the underlying HTTP client.""" + ... + + def __enter__(self) -> Anthropic: + """Context manager entry.""" + ... + + def __exit__(self, exc_type, exc_val, exc_tb) -> None: + """Context manager exit.""" + ... + + @property + def messages(self) -> Messages: + """Access Messages resource.""" + ... + + @property + def beta(self) -> Beta: + """Access Beta resources.""" + ... + + @property + def models(self) -> Models: + """Access Models resource.""" + ... +``` + +## Asynchronous Client + +```python { .api } +class AsyncAnthropic: + """Asynchronous client for Anthropic API.""" + + def __init__( + self, + *, + api_key: str | None = None, + auth_token: str | None = None, + base_url: str | httpx.URL | None = None, + timeout: float | httpx.Timeout = DEFAULT_TIMEOUT, + max_retries: int = DEFAULT_MAX_RETRIES, + default_headers: dict[str, str] | None = None, + default_query: dict[str, object] | None = None, + http_client: httpx.AsyncClient | None = None, + ): + """Initialize async Anthropic client. Same parameters as Anthropic.""" + ... + + async def close(self) -> None: + """Close the underlying HTTP client.""" + ... + + async def __aenter__(self) -> AsyncAnthropic: + """Async context manager entry.""" + ... + + async def __aexit__(self, exc_type, exc_val, exc_tb) -> None: + """Async context manager exit.""" + ... +``` + +## Constants + +```python { .api } +DEFAULT_TIMEOUT: float = 600.0 # 10 minutes +DEFAULT_MAX_RETRIES: int = 2 +``` + +## Quick Examples + +### Basic Initialization + +```python +from anthropic import Anthropic + +# Using environment variable ANTHROPIC_API_KEY +client = Anthropic() + +# Explicit API key +client = Anthropic(api_key="your-api-key") +``` + +### Context Manager + +```python +with Anthropic() as client: + message = client.messages.create(...) +# Client automatically closed +``` + +### Custom Timeout + +```python +import httpx + +# Single timeout value (applies to all) +client = Anthropic(timeout=120.0) + +# Granular timeout control +client = Anthropic( + timeout=httpx.Timeout( + connect=10.0, # Connection timeout + read=60.0, # Read timeout + write=10.0, # Write timeout + pool=10.0, # Pool timeout + ) +) +``` + +### Retry Configuration + +```python +# Increase retries +client = Anthropic(max_retries=5) + +# Disable retries +client = Anthropic(max_retries=0) +``` + +### Custom Headers + +```python +client = Anthropic( + default_headers={ + "X-Custom-Header": "value", + "User-Agent": "MyApp/1.0", + } +) +``` + +### Custom Base URL + +```python +# Development environment +client = Anthropic( + base_url="https://api.dev.anthropic.com" +) + +# Using environment variable ANTHROPIC_BASE_URL +import os +os.environ["ANTHROPIC_BASE_URL"] = "https://api.dev.anthropic.com" +client = Anthropic() +``` + +### Custom HTTP Client + +```python +import httpx + +# Custom httpx client +http_client = httpx.Client( + proxy="http://proxy.example.com:8080", + limits=httpx.Limits(max_connections=100), +) + +client = Anthropic(http_client=http_client) +``` + +### Async Client + +```python +import asyncio +from anthropic import AsyncAnthropic + +async def main(): + client = AsyncAnthropic() + message = await client.messages.create(...) + await client.close() + +asyncio.run(main()) +``` + +### Async Context Manager + +```python +async def main(): + async with AsyncAnthropic() as client: + message = await client.messages.create(...) + # Client automatically closed +``` + +### Bearer Token Authentication + +```python +client = Anthropic(auth_token="your-bearer-token") +``` + +### HTTP Client with Proxy + +```python +import httpx + +client = Anthropic( + http_client=httpx.Client( + proxy="http://proxy.example.com:8080" + ) +) +``` + +### Connection Pool Configuration + +```python +import httpx + +client = Anthropic( + http_client=httpx.Client( + limits=httpx.Limits( + max_connections=100, + max_keepalive_connections=20, + ) + ) +) +``` + +### Request Per-Call Options + +```python +# Override timeout for specific request +message = client.messages.with_options(timeout=120.0).create(...) + +# Override headers +message = client.messages.with_options( + headers={"X-Request-ID": "abc123"} +).create(...) + +# Override max retries +message = client.messages.with_options(max_retries=5).create(...) +``` + +## HTTP Client Factories + +```python { .api } +class DefaultHttpxClient: + """Default synchronous HTTP client using httpx.""" + def __init__( + self, + *, + proxy: str | httpx.Proxy | None = None, + transport: httpx.HTTPTransport | None = None, + **kwargs + ): ... + +class DefaultAsyncHttpxClient: + """Default asynchronous HTTP client using httpx.""" + def __init__( + self, + *, + proxy: str | httpx.Proxy | None = None, + transport: httpx.AsyncHTTPTransport | None = None, + **kwargs + ): ... + +class DefaultAioHttpClient: + """Alternative async client using aiohttp (better concurrency).""" + def __init__(self, **kwargs): ... +``` + +## Environment Variables + +- `ANTHROPIC_API_KEY` - API key for authentication +- `ANTHROPIC_BASE_URL` - Override base URL +- `ANTHROPIC_AUTH_TOKEN` - Bearer token (alternative to API key) + +## See Also + +- [Error Handling](./errors.md) - Exception handling and retry logic +- [Utilities](./utilities.md) - HTTP client utilities diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/reference/errors.md b/.tessl/tiles/tessl/pypi-anthropic/docs/reference/errors.md new file mode 100644 index 0000000..60f490e --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/reference/errors.md @@ -0,0 +1,287 @@ +# Error Handling Reference + +Complete exception hierarchy and error handling patterns for the Anthropic Python SDK. + +## Exception Hierarchy + +```python { .api } +AnthropicError +├── APIError +│ ├── APIStatusError +│ │ ├── BadRequestError (400) +│ │ ├── AuthenticationError (401) +│ │ ├── PermissionDeniedError (403) +│ │ ├── NotFoundError (404) +│ │ ├── ConflictError (409) +│ │ ├── RequestTooLargeError (413) +│ │ ├── UnprocessableEntityError (422) +│ │ ├── RateLimitError (429) +│ │ ├── InternalServerError (≥500) +│ │ ├── ServiceUnavailableError (503) +│ │ ├── DeadlineExceededError (504) +│ │ └── OverloadedError (529) +│ ├── APIConnectionError +│ ├── APITimeoutError +│ └── APIResponseValidationError +``` + +## Exception Classes + +### Base Exceptions + +```python { .api } +class AnthropicError(Exception): + """Base exception for all Anthropic errors.""" + ... + +class APIError(AnthropicError): + """Base for all API-related errors.""" + message: str + request: httpx.Request | None + body: object | None +``` + +### HTTP Status Errors + +```python { .api } +class APIStatusError(APIError): + """HTTP status code error.""" + response: httpx.Response + status_code: int + request_id: str | None + +class BadRequestError(APIStatusError): + """400 - Invalid request.""" + ... + +class AuthenticationError(APIStatusError): + """401 - Invalid API key.""" + ... + +class PermissionDeniedError(APIStatusError): + """403 - Insufficient permissions.""" + ... + +class NotFoundError(APIStatusError): + """404 - Resource not found.""" + ... + +class ConflictError(APIStatusError): + """409 - Request conflicts with current state.""" + ... + +class RequestTooLargeError(APIStatusError): + """413 - Request payload too large.""" + ... + +class UnprocessableEntityError(APIStatusError): + """422 - Request semantically invalid.""" + ... + +class RateLimitError(APIStatusError): + """429 - Rate limit exceeded.""" + ... + +class InternalServerError(APIStatusError): + """500+ - Server error.""" + ... + +class ServiceUnavailableError(APIStatusError): + """503 - Service temporarily unavailable.""" + ... + +class DeadlineExceededError(APIStatusError): + """504 - Request exceeded deadline.""" + ... + +class OverloadedError(APIStatusError): + """529 - Service overloaded.""" + ... +``` + +### Connection Errors + +```python { .api } +class APIConnectionError(APIError): + """Failed to connect to API.""" + ... + +class APITimeoutError(APIError): + """Request timed out.""" + ... + +class APIResponseValidationError(APIError): + """Response validation failed.""" + ... +``` + +## Error Response Format + +```python { .api } +class ErrorObject(BaseModel): + """Error object in API responses.""" + type: str + message: str + +class ErrorResponse(BaseModel): + """Error response wrapper.""" + type: Literal["error"] + error: ErrorObject +``` + +## Quick Examples + +### Basic Error Handling + +```python +from anthropic import APIError + +try: + message = client.messages.create(...) +except APIError as e: + print(f"Error: {e.message}") +``` + +### Handle Specific Errors + +```python +from anthropic import ( + RateLimitError, + AuthenticationError, + BadRequestError, +) + +try: + message = client.messages.create(...) +except RateLimitError as e: + retry_after = e.response.headers.get("retry-after") + print(f"Rate limited. Retry after {retry_after}s") +except AuthenticationError: + print("Invalid API key") +except BadRequestError as e: + print(f"Invalid request: {e.message}") +``` + +### Retry with Exponential Backoff + +```python +import time +from anthropic import RateLimitError, InternalServerError + +def create_message_with_retry(max_retries=3): + for attempt in range(max_retries): + try: + return client.messages.create(...) + except (RateLimitError, InternalServerError) as e: + if attempt == max_retries - 1: + raise + wait_time = 2 ** attempt + print(f"Retry {attempt + 1} after {wait_time}s") + time.sleep(wait_time) +``` + +### Extract Request ID + +```python +from anthropic import APIStatusError + +try: + message = client.messages.create(...) +except APIStatusError as e: + print(f"Request ID: {e.request_id}") + print(f"Status: {e.status_code}") +``` + +### Validate Response + +```python +from anthropic import APIResponseValidationError + +try: + message = client.messages.create(...) +except APIResponseValidationError as e: + print(f"Response validation failed: {e.message}") +``` + +### Handle All Errors + +```python +from anthropic import ( + APIError, + APIConnectionError, + APITimeoutError, + RateLimitError, +) + +try: + message = client.messages.create(...) +except APITimeoutError: + print("Request timed out") +except APIConnectionError: + print("Connection failed") +except RateLimitError: + print("Rate limit exceeded") +except APIError as e: + print(f"API error: {e.message}") +``` + +## Best Practices + +### Always Handle Exceptions + +```python +# Bad +message = client.messages.create(...) + +# Good +try: + message = client.messages.create(...) +except APIError as e: + # Handle error + ... +``` + +### Use Specific Exception Types + +```python +# Less precise +try: + message = client.messages.create(...) +except Exception: + ... + +# More precise +try: + message = client.messages.create(...) +except RateLimitError: + # Handle rate limit + ... +except APIError: + # Handle other API errors + ... +``` + +### Log Request Context + +```python +import logging + +logger = logging.getLogger(__name__) + +try: + message = client.messages.create(...) +except APIStatusError as e: + logger.error( + "API request failed", + extra={ + "request_id": e.request_id, + "status_code": e.status_code, + "error_message": e.message, + } + ) +``` + +## See Also + +- [Client Configuration](./client-config.md) - Configure retry and timeout behavior +- [Error Handling Guide](../guides/error-handling.md) - Advanced error handling patterns diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/reference/pagination.md b/.tessl/tiles/tessl/pypi-anthropic/docs/reference/pagination.md new file mode 100644 index 0000000..bfd82a6 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/reference/pagination.md @@ -0,0 +1,615 @@ +# Pagination Reference + +Auto-paginating iterators for list operations with manual control options. The SDK provides three pagination types: ID-based, token-based, and cursor-based. + +## Pagination Classes + +### Synchronous Pagination + +```python { .api } +class SyncPage(Generic[T]): + """ + Synchronous ID-based pagination. + + Provides: + - Automatic iteration over all items + - Manual page control + - Page metadata access + + Attributes: + data: Items in current page + """ + data: list[T] + + def __iter__(self) -> Iterator[T]: + """ + Iterate over all items across all pages automatically. + + Yields: + Individual items from current and subsequent pages + """ + ... + + def __getitem__(self, index: int) -> T: + """ + Get item by index in current page. + + Parameters: + index: Item index in current page + + Returns: + Item at index + + Raises: + IndexError: If index out of range + """ + ... + + def has_next_page(self) -> bool: + """ + Check if another page exists. + + Returns: + True if next page available + """ + ... + + def next_page_info(self) -> dict[str, Any]: + """ + Get information needed to fetch next page. + + Returns: + Dictionary with pagination parameters (after_id, limit, etc.) + """ + ... + + def get_next_page(self) -> SyncPage[T]: + """ + Fetch next page. + + Returns: + New SyncPage for next page + + Raises: + ValueError: If no next page exists + """ + ... + +class SyncTokenPage(Generic[T]): + """ + Synchronous token-based pagination. + + Similar to SyncPage but uses continuation tokens instead of IDs. + + Attributes: + data: Items in current page + """ + data: list[T] + + def __iter__(self) -> Iterator[T]: + """Iterate over all items automatically.""" + ... + + def has_next_page(self) -> bool: + """Check if next page exists.""" + ... + + def next_page_info(self) -> dict[str, Any]: + """Get next page token.""" + ... + + def get_next_page(self) -> SyncTokenPage[T]: + """Fetch next page.""" + ... + +class SyncPageCursor(Generic[T]): + """ + Synchronous cursor-based pagination. + + Uses cursors for pagination instead of IDs or tokens. + + Attributes: + data: Items in current page + """ + data: list[T] + + def __iter__(self) -> Iterator[T]: + """Iterate over all items automatically.""" + ... + + def has_next_page(self) -> bool: + """Check if next page exists.""" + ... + + def next_page_info(self) -> dict[str, Any]: + """Get next page cursor.""" + ... + + def get_next_page(self) -> SyncPageCursor[T]: + """Fetch next page.""" + ... +``` + +### Asynchronous Pagination + +```python { .api } +class AsyncPage(Generic[T]): + """ + Asynchronous ID-based pagination. + + Provides: + - Async automatic iteration over all items + - Async manual page control + - Page metadata access + + Attributes: + data: Items in current page + """ + data: list[T] + + def __aiter__(self) -> AsyncIterator[T]: + """ + Async iterate over all items across all pages. + + Yields: + Individual items from current and subsequent pages + """ + ... + + def __getitem__(self, index: int) -> T: + """Get item by index in current page.""" + ... + + async def has_next_page(self) -> bool: + """Check if another page exists.""" + ... + + async def next_page_info(self) -> dict[str, Any]: + """Get information needed to fetch next page.""" + ... + + async def get_next_page(self) -> AsyncPage[T]: + """Fetch next page.""" + ... + +class AsyncTokenPage(Generic[T]): + """ + Asynchronous token-based pagination. + + Attributes: + data: Items in current page + """ + data: list[T] + + def __aiter__(self) -> AsyncIterator[T]: ... + async def has_next_page(self) -> bool: ... + async def next_page_info(self) -> dict[str, Any]: ... + async def get_next_page(self) -> AsyncTokenPage[T]: ... + +class AsyncPageCursor(Generic[T]): + """ + Asynchronous cursor-based pagination. + + Attributes: + data: Items in current page + """ + data: list[T] + + def __aiter__(self) -> AsyncIterator[T]: ... + async def has_next_page(self) -> bool: ... + async def next_page_info(self) -> dict[str, Any]: ... + async def get_next_page(self) -> AsyncPageCursor[T]: ... +``` + +## Usage Examples + +### Auto-Pagination (Simple) + +```python +from anthropic import Anthropic + +client = Anthropic() + +# Automatically iterate over all batches +for batch in client.messages.batches.list(): + print(f"Batch {batch.id}: {batch.processing_status}") +``` + +### Auto-Pagination with Limit + +```python +# Get first 100 batches total (auto-fetching pages) +count = 0 +for batch in client.messages.batches.list(limit=20): + print(batch.id) + count += 1 + if count >= 100: + break +``` + +### Manual Pagination + +```python +# Get first page +page = client.messages.batches.list(limit=10) + +# Process first page +for batch in page.data: + print(f"Batch: {batch.id}") + +# Check if more pages +if page.has_next_page(): + print("More pages available") + + # Get next page info + next_info = page.next_page_info() + print(f"Next page params: {next_info}") + + # Fetch next page + next_page = page.get_next_page() + for batch in next_page.data: + print(f"Next page batch: {batch.id}") +``` + +### Iterate All Pages Manually + +```python +page = client.messages.batches.list(limit=10) + +while True: + # Process current page + for batch in page.data: + print(batch.id) + + # Check for next page + if not page.has_next_page(): + break + + # Fetch next page + page = page.get_next_page() +``` + +### Pagination with Before/After + +```python +# Get batches after specific ID +page = client.messages.batches.list( + after_id="batch_abc123", + limit=20 +) + +for batch in page: + print(batch.id) + +# Get batches before specific ID +page = client.messages.batches.list( + before_id="batch_xyz789", + limit=20 +) + +for batch in page: + print(batch.id) +``` + +### Access Current Page Data + +```python +page = client.messages.batches.list(limit=5) + +# Get items in current page +items = page.data +print(f"Current page has {len(items)} items") + +# Access by index +first_item = page[0] +print(f"First item: {first_item.id}") +``` + +### Async Auto-Pagination + +```python +import asyncio +from anthropic import AsyncAnthropic + +async def main(): + client = AsyncAnthropic() + + # Automatically iterate over all batches + async for batch in client.messages.batches.list(): + print(f"Batch {batch.id}: {batch.processing_status}") + +asyncio.run(main()) +``` + +### Async Manual Pagination + +```python +async def paginate_manually(): + client = AsyncAnthropic() + + # Get first page + page = await client.messages.batches.list(limit=10) + + # Process first page + for batch in page.data: + print(batch.id) + + # Check and fetch next page + if await page.has_next_page(): + next_page = await page.get_next_page() + for batch in next_page.data: + print(batch.id) + +asyncio.run(paginate_manually()) +``` + +### Async Iterate All Pages + +```python +async def iterate_all_pages(): + client = AsyncAnthropic() + + page = await client.messages.batches.list(limit=10) + + while True: + for batch in page.data: + print(batch.id) + + if not await page.has_next_page(): + break + + page = await page.get_next_page() + +asyncio.run(iterate_all_pages()) +``` + +### Pagination with Processing + +```python +# Collect all batches with filtering +completed_batches = [] + +for batch in client.messages.batches.list(): + if batch.processing_status == "ended": + completed_batches.append(batch) + +print(f"Found {len(completed_batches)} completed batches") +``` + +### Limit Total Items + +```python +# Get exactly 50 items total +items = [] +for batch in client.messages.batches.list(limit=20): + items.append(batch) + if len(items) >= 50: + break + +print(f"Collected {len(items)} items") +``` + +### Paginate with Error Handling + +```python +from anthropic import APIError + +try: + for batch in client.messages.batches.list(): + print(batch.id) +except APIError as e: + print(f"Pagination error: {e}") +``` + +### Concurrent Page Fetching + +```python +import asyncio + +async def fetch_multiple_pages(): + client = AsyncAnthropic() + + # Fetch first page + page1 = await client.messages.batches.list(limit=10) + + # Fetch multiple subsequent pages concurrently + if await page1.has_next_page(): + next_info = await page1.next_page_info() + + pages = await asyncio.gather( + page1.get_next_page(), + client.messages.batches.list(**next_info, limit=10), + ) + + for page in pages: + for batch in page.data: + print(batch.id) + +asyncio.run(fetch_multiple_pages()) +``` + +### List Models with Pagination + +```python +# Auto-paginate through all models +for model in client.models.list(): + print(f"{model.id}: {model.display_name}") +``` + +### Count Items with Pagination + +```python +# Count total items +total = 0 +for batch in client.messages.batches.list(): + total += 1 + +print(f"Total batches: {total}") +``` + +### Pagination Performance + +```python +import time + +start = time.time() + +# Efficient pagination with larger page size +count = 0 +for batch in client.messages.batches.list(limit=100): + count += 1 + +elapsed = time.time() - start +print(f"Processed {count} items in {elapsed:.2f} seconds") +``` + +### Page Metadata + +```python +page = client.messages.batches.list(limit=10) + +print(f"Items in page: {len(page.data)}") +print(f"Has next: {page.has_next_page()}") + +if page.has_next_page(): + next_info = page.next_page_info() + print(f"Next page info: {next_info}") +``` + +### Reverse Pagination + +```python +# Get most recent items first (default) +for batch in client.messages.batches.list(limit=10): + print(f"Recent: {batch.id} - {batch.created_at}") + +# Get older items using before_id +oldest_on_page = None +for batch in page.data: + oldest_on_page = batch.id + +if oldest_on_page: + older_page = client.messages.batches.list(before_id=oldest_on_page, limit=10) + for batch in older_page: + print(f"Older: {batch.id}") +``` + +### Collect All Items + +```python +# Collect all items into list +all_batches = [] +for batch in client.messages.batches.list(): + all_batches.append(batch) + +print(f"Total batches: {len(all_batches)}") +``` + +### Batch Processing Pages + +```python +def process_batch(batches): + """Process a batch of items.""" + for batch in batches: + print(f"Processing {batch.id}") + +page = client.messages.batches.list(limit=50) +while True: + process_batch(page.data) + + if not page.has_next_page(): + break + + page = page.get_next_page() +``` + +### Custom Page Size + +```python +# Use small pages for frequent updates +for batch in client.messages.batches.list(limit=5): + print(batch.id) + +# Use large pages for bulk processing +for batch in client.messages.batches.list(limit=100): + print(batch.id) +``` + +## JSONL Stream Decoders + +For batch results that return JSONL streams: + +```python { .api } +from typing import Generic, TypeVar, Iterator, AsyncIterator + +T = TypeVar('T') + +class JSONLDecoder(Generic[T]): + """ + Synchronous JSONL stream decoder. + + Decodes newline-delimited JSON objects from streaming responses, + commonly used for batch result streaming. + + Yields: + Decoded objects of type T, one per JSONL line + """ + + def __iter__(self) -> Iterator[T]: + """Iterate over decoded JSONL objects.""" + ... + +class AsyncJSONLDecoder(Generic[T]): + """ + Asynchronous JSONL stream decoder. + + Async version for decoding JSONL streams. + + Yields: + Decoded objects of type T, one per JSONL line + """ + + def __aiter__(self) -> AsyncIterator[T]: + """Async iterate over decoded JSONL objects.""" + ... +``` + +### JSONL Decoder Usage + +```python +# Stream batch results (returns JSONL decoder) +results = client.messages.batches.results("batch_abc123") + +# Iterate over individual results +for result in results: + if result.result.type == "succeeded": + print(f"Message: {result.result.message.content[0].text}") + elif result.result.type == "errored": + print(f"Error: {result.result.error.message}") +``` + +### Async JSONL Decoding + +```python +import asyncio + +async def process_batch_results(): + client = AsyncAnthropic() + + # Get async JSONL decoder + results = await client.messages.batches.results("batch_abc123") + + # Async iterate + async for result in results: + print(f"Custom ID: {result.custom_id}") + # Process result + +asyncio.run(process_batch_results()) +``` + +## See Also + +- [Messages API](../api/messages.md) - Message pagination +- [Batches API](../api/batches.md) - Batch pagination and JSONL results +- [Models API](../api/models.md) - Model pagination +- [Type System](./types.md) - Pagination type definitions diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/reference/types.md b/.tessl/tiles/tessl/pypi-anthropic/docs/reference/types.md new file mode 100644 index 0000000..f1848d5 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/reference/types.md @@ -0,0 +1,1413 @@ +# Type System Reference + +Comprehensive type definitions for all request parameters and response objects using Pydantic models and TypedDict. All types are fully typed for static analysis and IDE support. + +## Core Message Types + +### Message + +```python { .api } +class Message(BaseModel): + """ + Complete message response from Claude. + + Attributes: + id: Unique message identifier (starts with "msg_") + type: Always "message" + role: Always "assistant" + content: List of content blocks (text, tool_use, etc.) + model: Model identifier used for generation + stop_reason: Why generation stopped + stop_sequence: Stop sequence that triggered completion (if any) + usage: Token usage statistics + """ + id: str + type: Literal["message"] + role: Literal["assistant"] + content: list[ContentBlock] + model: str + stop_reason: StopReason | None + stop_sequence: str | None + usage: Usage +``` + +### MessageParam + +```python { .api } +class MessageParam(TypedDict): + """ + User or assistant message in conversation. + + Fields: + role: "user" or "assistant" + content: String or list of content blocks + """ + role: Literal["user", "assistant"] + content: str | list[ContentBlockParam] +``` + +### ContentBlock (Response) + +```python { .api } +ContentBlock = Union[TextBlock, ToolUseBlock] + +class TextBlock(BaseModel): + """ + Text content in assistant response. + + Attributes: + type: Always "text" + text: The text content + """ + type: Literal["text"] + text: str + +class ToolUseBlock(BaseModel): + """ + Tool invocation in assistant response. + + Attributes: + type: Always "tool_use" + id: Unique tool call identifier + name: Tool name + input: Tool input parameters as dict + """ + type: Literal["tool_use"] + id: str + name: str + input: dict[str, Any] +``` + +### ContentBlockParam (Request) + +```python { .api } +ContentBlockParam = Union[ + TextBlockParam, + ImageBlockParam, + DocumentBlockParam, + ToolUseBlockParam, + ToolResultBlockParam, +] + +class TextBlockParam(TypedDict): + """ + Text content in message. + + Fields: + type: Always "text" + text: The text content + cache_control: Optional cache control + """ + type: Literal["text"] + text: str + cache_control: NotRequired[CacheControlEphemeral] + +class ImageBlockParam(TypedDict): + """ + Image content in message. + + Fields: + type: Always "image" + source: Image source (base64 or URL) + cache_control: Optional cache control + """ + type: Literal["image"] + source: Base64ImageSource | URLImageSource + cache_control: NotRequired[CacheControlEphemeral] + +class DocumentBlockParam(TypedDict): + """ + Document content (PDF, text) in message. + + Fields: + type: Always "document" + source: Document source (base64 or URL) + cache_control: Optional cache control + """ + type: Literal["document"] + source: Base64PDFSource | URLPDFSource | PlainTextSource + cache_control: NotRequired[CacheControlEphemeral] + +class ToolUseBlockParam(TypedDict): + """ + Tool invocation in assistant message (for conversation history). + + Fields: + type: Always "tool_use" + id: Tool call identifier + name: Tool name + input: Tool input parameters + cache_control: Optional cache control + """ + type: Literal["tool_use"] + id: str + name: str + input: dict[str, Any] + cache_control: NotRequired[CacheControlEphemeral] + +class ToolResultBlockParam(TypedDict): + """ + Tool result in user message. + + Fields: + type: Always "tool_result" + tool_use_id: ID of tool invocation this is result for + content: String or list of content blocks + is_error: Whether result represents an error + cache_control: Optional cache control + """ + type: Literal["tool_result"] + tool_use_id: str + content: NotRequired[str | list[TextBlockParam | ImageBlockParam]] + is_error: NotRequired[bool] + cache_control: NotRequired[CacheControlEphemeral] +``` + +## Content Source Types + +### Image Sources + +```python { .api } +class Base64ImageSource(TypedDict): + """ + Base64-encoded image. + + Fields: + type: Always "base64" + media_type: MIME type (jpeg, png, gif, webp) + data: Base64-encoded image data (without data URL prefix) + """ + type: Literal["base64"] + media_type: Literal["image/jpeg", "image/png", "image/gif", "image/webp"] + data: str + +class URLImageSource(TypedDict): + """ + Image from URL. + + Fields: + type: Always "url" + url: Image URL (must be publicly accessible) + """ + type: Literal["url"] + url: str +``` + +### Document Sources + +```python { .api } +class Base64PDFSource(TypedDict): + """ + Base64-encoded PDF document. + + Fields: + type: Always "base64" + media_type: Always "application/pdf" + data: Base64-encoded PDF data + """ + type: Literal["base64"] + media_type: Literal["application/pdf"] + data: str + +class URLPDFSource(TypedDict): + """ + PDF document from URL. + + Fields: + type: Always "url" + media_type: Always "application/pdf" + url: PDF URL + """ + type: Literal["url"] + media_type: Literal["application/pdf"] + url: str + +class PlainTextSource(TypedDict): + """ + Plain text document. + + Fields: + type: Always "text" + media_type: Always "text/plain" + data: Plain text content + """ + type: Literal["text"] + media_type: Literal["text/plain"] + data: str +``` + +## Tool Types + +### Tool Definition + +```python { .api } +class Tool(BaseModel): + """ + Tool definition in response. + + Attributes: + name: Tool name + description: Tool description + input_schema: JSON Schema for parameters + """ + name: str + description: str + input_schema: dict[str, Any] + +class ToolParam(TypedDict): + """ + Tool definition in request. + + Fields: + name: Tool name (alphanumeric + underscores) + description: What the tool does + input_schema: JSON Schema for tool parameters + cache_control: Optional cache control + """ + name: str + description: str + input_schema: dict[str, Any] + cache_control: NotRequired[CacheControlEphemeral] +``` + +### Tool Choice + +```python { .api } +ToolChoice = Union[ + ToolChoiceAuto, + ToolChoiceAny, + ToolChoiceNone, + ToolChoiceTool, +] + +class ToolChoiceAuto(TypedDict): + """ + Let Claude decide whether to use tools (default). + + Fields: + type: Always "auto" + disable_parallel_tool_use: Disable parallel tool calls + """ + type: Literal["auto"] + disable_parallel_tool_use: NotRequired[bool] + +class ToolChoiceAny(TypedDict): + """ + Force Claude to use at least one tool. + + Fields: + type: Always "any" + disable_parallel_tool_use: Disable parallel tool calls + """ + type: Literal["any"] + disable_parallel_tool_use: NotRequired[bool] + +class ToolChoiceNone(TypedDict): + """ + Disable all tool use. + + Fields: + type: Always "none" + """ + type: Literal["none"] + +class ToolChoiceTool(TypedDict): + """ + Force Claude to use specific tool. + + Fields: + type: Always "tool" + name: Tool name to use + disable_parallel_tool_use: Disable parallel tool calls + """ + type: Literal["tool"] + name: str + disable_parallel_tool_use: NotRequired[bool] +``` + +## Configuration Types + +### Usage + +```python { .api } +class Usage(BaseModel): + """ + Token usage statistics. + + Attributes: + input_tokens: Number of input tokens + output_tokens: Number of output tokens + cache_creation_input_tokens: Tokens used to create cache (if using prompt caching) + cache_read_input_tokens: Tokens read from cache (if using prompt caching) + """ + input_tokens: int + output_tokens: int + cache_creation_input_tokens: int | None = None + cache_read_input_tokens: int | None = None +``` + +### Metadata + +```python { .api } +class Metadata(BaseModel): + """ + Response metadata. + + Attributes: + user_id: End-user identifier (if provided) + """ + user_id: str | None = None + +class MetadataParam(TypedDict, total=False): + """ + Request metadata for tracking and compliance. + + Fields: + user_id: End-user identifier for tracking and abuse prevention + """ + user_id: str +``` + +### Cache Control + +```python { .api } +class CacheControlEphemeral(TypedDict): + """ + Ephemeral cache control for prompt caching. + + Fields: + type: Always "ephemeral" + """ + type: Literal["ephemeral"] +``` + +### Thinking Configuration + +```python { .api } +ThinkingConfigParam = Union[ThinkingConfigEnabled, ThinkingConfigDisabled] + +class ThinkingConfigEnabled(TypedDict): + """ + Enable extended thinking for enhanced reasoning. + + Fields: + type: Always "enabled" + """ + type: Literal["enabled"] + +class ThinkingConfigDisabled(TypedDict): + """ + Disable extended thinking. + + Fields: + type: Always "disabled" + """ + type: Literal["disabled"] +``` + +### Stop Reason + +```python { .api } +StopReason = Literal[ + "end_turn", # Natural completion + "max_tokens", # Hit max_tokens limit + "stop_sequence", # Hit custom stop sequence + "tool_use", # Model wants to use a tool +] +``` + +## Token Counting Types + +```python { .api } +class MessageTokensCount(BaseModel): + """ + Token count response. + + Attributes: + input_tokens: Number of input tokens + """ + input_tokens: int +``` + +## Batch Types + +### Batch + +```python { .api } +class MessageBatch(BaseModel): + """ + Batch metadata and status. + + Attributes: + id: Unique batch identifier (starts with "msgbatch_") + type: Always "message_batch" + processing_status: Current processing status + request_counts: Counts by result type + ended_at: When batch completed (ISO 8601) + created_at: When batch was created (ISO 8601) + expires_at: When batch results will expire (ISO 8601) + cancel_initiated_at: When cancellation was initiated (ISO 8601) + results_url: URL to download results JSONL + """ + id: str + type: Literal["message_batch"] + processing_status: Literal["in_progress", "canceling", "ended"] + request_counts: MessageBatchRequestCounts + ended_at: str | None + created_at: str + expires_at: str + cancel_initiated_at: str | None + results_url: str | None + +class MessageBatchRequestCounts(BaseModel): + """ + Request count statistics in batch. + + Attributes: + processing: Requests currently processing + succeeded: Successful requests + errored: Failed requests + canceled: Canceled requests + expired: Expired requests + """ + processing: int + succeeded: int + errored: int + canceled: int + expired: int + +class DeletedMessageBatch(BaseModel): + """ + Deleted batch confirmation. + + Attributes: + id: Batch identifier that was deleted + type: Always "message_batch_deleted" + """ + id: str + type: Literal["message_batch_deleted"] +``` + +### Batch Request/Response + +```python { .api } +class MessageBatchIndividualRequest(TypedDict): + """ + Individual request in batch. + + Fields: + custom_id: Client-provided unique identifier + params: Message creation parameters + """ + custom_id: str + params: MessageCreateParams + +class MessageBatchIndividualResponse(BaseModel): + """ + Individual response in batch results. + + Attributes: + custom_id: Client-provided identifier from request + result: Result object (success/error/canceled/expired) + """ + custom_id: str + result: MessageBatchResult + +MessageBatchResult = Union[ + MessageBatchSucceededResult, + MessageBatchErroredResult, + MessageBatchCanceledResult, + MessageBatchExpiredResult, +] + +class MessageBatchSucceededResult(BaseModel): + """ + Successful batch result. + + Attributes: + type: Always "succeeded" + message: The message response + """ + type: Literal["succeeded"] + message: Message + +class MessageBatchErroredResult(BaseModel): + """ + Failed batch result. + + Attributes: + type: Always "errored" + error: Error details + """ + type: Literal["errored"] + error: ErrorObject + +class MessageBatchCanceledResult(BaseModel): + """ + Canceled batch result. + + Attributes: + type: Always "canceled" + """ + type: Literal["canceled"] + +class MessageBatchExpiredResult(BaseModel): + """ + Expired batch result. + + Attributes: + type: Always "expired" + """ + type: Literal["expired"] +``` + +## Model Types + +```python { .api } +class ModelInfo(BaseModel): + """ + Model information and capabilities. + + Attributes: + id: Model identifier + type: Always "model" + display_name: Human-readable model name + created_at: When model was created (ISO 8601) + """ + id: str + type: Literal["model"] + display_name: str + created_at: str + +Model = Literal[ + "claude-opus-4-5-20250929", + "claude-sonnet-4-5-20250929", + "claude-3-5-sonnet-20241022", + "claude-3-5-sonnet-20240620", + "claude-3-5-haiku-20241022", + "claude-3-opus-20240229", + "claude-3-sonnet-20240229", + "claude-3-haiku-20240307", + # Additional models... +] +``` + +## Error Types + +```python { .api } +class ErrorObject(BaseModel): + """ + Error object in API responses. + + Attributes: + type: Error type identifier + message: Human-readable error message + """ + type: str + message: str + +class ErrorResponse(BaseModel): + """ + Error response wrapper. + + Attributes: + type: Always "error" + error: Error details + """ + type: Literal["error"] + error: APIErrorObject + +class APIErrorObject(BaseModel): + """ + API error object. + + Attributes: + type: Error type + message: Error message + """ + type: str + message: str +``` + +## Stream Event Types + +### Message Stream Events + +```python { .api } +MessageStreamEvent = Union[ + MessageStartEvent, + MessageDeltaEvent, + MessageStopEvent, + ContentBlockStartEvent, + ContentBlockDeltaEvent, + ContentBlockStopEvent, +] + +class MessageStartEvent(BaseModel): + """ + Stream started with initial message. + + Attributes: + type: Always "message_start" + message: Initial message with empty content + """ + type: Literal["message_start"] + message: Message + +class MessageDeltaEvent(BaseModel): + """ + Message metadata changed. + + Attributes: + type: Always "message_delta" + delta: Changed fields + usage: Token usage update + """ + type: Literal["message_delta"] + delta: MessageDelta + usage: MessageDeltaUsage + +class MessageDelta(BaseModel): + """ + Message field changes. + + Attributes: + stop_reason: Updated stop reason + stop_sequence: Stop sequence that triggered + """ + stop_reason: StopReason | None + stop_sequence: str | None + +class MessageDeltaUsage(BaseModel): + """ + Token usage in stream delta. + + Attributes: + output_tokens: Output tokens generated so far + """ + output_tokens: int + +class MessageStopEvent(BaseModel): + """ + Stream completed. + + Attributes: + type: Always "message_stop" + """ + type: Literal["message_stop"] +``` + +### Content Block Stream Events + +```python { .api } +class ContentBlockStartEvent(BaseModel): + """ + New content block started. + + Attributes: + type: Always "content_block_start" + index: Content block index + content_block: Initial content block + """ + type: Literal["content_block_start"] + index: int + content_block: ContentBlock + +class ContentBlockDeltaEvent(BaseModel): + """ + Content block received delta. + + Attributes: + type: Always "content_block_delta" + index: Content block index + delta: Delta content + """ + type: Literal["content_block_delta"] + index: int + delta: ContentBlockDelta + +ContentBlockDelta = Union[TextDelta, InputJSONDelta] + +class TextDelta(BaseModel): + """ + Text content delta. + + Attributes: + type: Always "text_delta" + text: Incremental text + """ + type: Literal["text_delta"] + text: str + +class InputJSONDelta(BaseModel): + """ + Tool input JSON delta. + + Attributes: + type: Always "input_json_delta" + partial_json: Incremental JSON string + """ + type: Literal["input_json_delta"] + partial_json: str + +class ContentBlockStopEvent(BaseModel): + """ + Content block completed. + + Attributes: + type: Always "content_block_stop" + index: Content block index + """ + type: Literal["content_block_stop"] + index: int +``` + +### Raw Stream Events + +```python { .api } +RawMessageStreamEvent = Union[ + RawMessageStartEvent, + RawMessageDeltaEvent, + RawMessageStopEvent, + RawContentBlockStartEvent, + RawContentBlockDeltaEvent, + RawContentBlockStopEvent, +] + +class RawMessageStartEvent(BaseModel): + """ + Raw message start event from SSE. + + Attributes: + type: Always "message_start" + message: Initial message + """ + type: Literal["message_start"] + message: Message + +class RawMessageDeltaEvent(BaseModel): + """ + Raw message delta event from SSE. + + Attributes: + type: Always "message_delta" + delta: Changed fields as dict + usage: Token usage update + """ + type: Literal["message_delta"] + delta: dict[str, Any] + usage: MessageDeltaUsage + +class RawMessageStopEvent(BaseModel): + """ + Raw message stop event from SSE. + + Attributes: + type: Always "message_stop" + """ + type: Literal["message_stop"] + +class RawContentBlockStartEvent(BaseModel): + """ + Raw content block start from SSE. + + Attributes: + type: Always "content_block_start" + index: Content block index + content_block: Initial content block as dict + """ + type: Literal["content_block_start"] + index: int + content_block: dict[str, Any] + +class RawContentBlockDeltaEvent(BaseModel): + """ + Raw content block delta from SSE. + + Attributes: + type: Always "content_block_delta" + index: Content block index + delta: Delta content as dict + """ + type: Literal["content_block_delta"] + index: int + delta: dict[str, Any] + +class RawContentBlockStopEvent(BaseModel): + """ + Raw content block stop from SSE. + + Attributes: + type: Always "content_block_stop" + index: Content block index + """ + type: Literal["content_block_stop"] + index: int +``` + +## Pagination Types + +### Synchronous Pagination + +```python { .api } +class SyncPage(Generic[T]): + """ + Synchronous ID-based pagination. + + Attributes: + data: Items in current page + """ + data: list[T] + + def __iter__(self) -> Iterator[T]: + """Iterate over all items across pages automatically.""" + ... + + def __getitem__(self, index: int) -> T: + """Get item by index in current page.""" + ... + + def has_next_page(self) -> bool: + """Check if another page exists.""" + ... + + def next_page_info(self) -> dict[str, Any]: + """Get pagination parameters for next page.""" + ... + + def get_next_page(self) -> SyncPage[T]: + """Fetch next page.""" + ... + +class SyncTokenPage(Generic[T]): + """ + Synchronous token-based pagination. + + Uses continuation tokens instead of IDs. + + Attributes: + data: Items in current page + """ + data: list[T] + + def __iter__(self) -> Iterator[T]: ... + def has_next_page(self) -> bool: ... + def next_page_info(self) -> dict[str, Any]: ... + def get_next_page(self) -> SyncTokenPage[T]: ... + +class SyncPageCursor(Generic[T]): + """ + Synchronous cursor-based pagination. + + Uses cursors for pagination. + + Attributes: + data: Items in current page + """ + data: list[T] + + def __iter__(self) -> Iterator[T]: ... + def has_next_page(self) -> bool: ... + def next_page_info(self) -> dict[str, Any]: ... + def get_next_page(self) -> SyncPageCursor[T]: ... +``` + +### Asynchronous Pagination + +```python { .api } +class AsyncPage(Generic[T]): + """ + Asynchronous ID-based pagination. + + Attributes: + data: Items in current page + """ + data: list[T] + + def __aiter__(self) -> AsyncIterator[T]: + """Async iterate over all items across pages.""" + ... + + def __getitem__(self, index: int) -> T: + """Get item by index in current page.""" + ... + + async def has_next_page(self) -> bool: + """Check if another page exists.""" + ... + + async def next_page_info(self) -> dict[str, Any]: + """Get pagination parameters for next page.""" + ... + + async def get_next_page(self) -> AsyncPage[T]: + """Fetch next page.""" + ... + +class AsyncTokenPage(Generic[T]): + """ + Asynchronous token-based pagination. + + Attributes: + data: Items in current page + """ + data: list[T] + + def __aiter__(self) -> AsyncIterator[T]: ... + async def has_next_page(self) -> bool: ... + async def next_page_info(self) -> dict[str, Any]: ... + async def get_next_page(self) -> AsyncTokenPage[T]: ... + +class AsyncPageCursor(Generic[T]): + """ + Asynchronous cursor-based pagination. + + Attributes: + data: Items in current page + """ + data: list[T] + + def __aiter__(self) -> AsyncIterator[T]: ... + async def has_next_page(self) -> bool: ... + async def next_page_info(self) -> dict[str, Any]: ... + async def get_next_page(self) -> AsyncPageCursor[T]: ... +``` + +## Type Helpers + +```python { .api } +class NOT_GIVEN: + """ + Sentinel value for omitted optional parameters. + + Used to distinguish between explicitly passing None vs not passing a parameter. + This allows the SDK to differentiate between: + - Parameter not provided (NOT_GIVEN) + - Parameter explicitly set to None (None) + """ + ... + +NotGiven = type[NOT_GIVEN] + +def not_given() -> NOT_GIVEN: + """Return NOT_GIVEN sentinel.""" + ... + +class Omit: + """ + Type for omitted fields. + + Used internally for partial updates where some fields should be omitted. + """ + ... + +def omit() -> Omit: + """Return Omit sentinel.""" + ... + +NoneType = type[None] +``` + +## Request Parameter Types + +```python { .api } +class MessageCreateParams(TypedDict): + """ + Complete parameters for message creation. + + Fields: + model: Model identifier (required) + messages: Conversation messages (required) + max_tokens: Maximum tokens to generate (required) + system: System prompt (optional) + metadata: Request metadata (optional) + stop_sequences: Stop sequences (optional) + stream: Enable streaming (optional) + temperature: Sampling temperature 0.0-1.0 (optional) + top_p: Nucleus sampling (optional) + top_k: Top-k sampling (optional) + tools: Available tools (optional) + tool_choice: Tool selection control (optional) + service_tier: Service tier selection (optional) + thinking: Extended thinking configuration (optional) + """ + model: str + messages: list[MessageParam] + max_tokens: int + system: NotRequired[str | list[TextBlockParam]] + metadata: NotRequired[MetadataParam] + stop_sequences: NotRequired[list[str]] + stream: NotRequired[bool] + temperature: NotRequired[float] + top_p: NotRequired[float] + top_k: NotRequired[int] + tools: NotRequired[list[ToolParam]] + tool_choice: NotRequired[ToolChoice] + service_tier: NotRequired[Literal["auto", "standard_only"]] + thinking: NotRequired[ThinkingConfigParam] + +class RequestOptions(TypedDict, total=False): + """ + Per-request options. + + Fields: + headers: Custom headers + max_retries: Maximum retry attempts + timeout: Request timeout + query: Custom query parameters + """ + headers: dict[str, str] + max_retries: int + timeout: float | httpx.Timeout + query: dict[str, object] +``` + +## Response Wrapper Types + +```python { .api } +class APIResponse(Generic[T]): + """ + HTTP response wrapper with parsed data. + + Attributes: + data: Parsed response data + headers: Response headers + status_code: HTTP status code + request: Original request + """ + data: T + headers: dict[str, str] + status_code: int + request: httpx.Request + + def parse(self) -> T: + """ + Get parsed response data. + + Returns: + Parsed response object of type T + """ + ... + +class AsyncAPIResponse(Generic[T]): + """ + Async HTTP response wrapper. + + Attributes: + data: Parsed response data + headers: Response headers + status_code: HTTP status code + request: Original request + """ + data: T + headers: dict[str, str] + status_code: int + request: httpx.Request + + async def parse(self) -> T: + """ + Get parsed response data asynchronously. + + Returns: + Parsed response object of type T + """ + ... +``` + +## Base Model + +```python { .api } +class BaseModel(pydantic.BaseModel): + """ + Base Pydantic model for all response types. + + Provides: + - JSON serialization/deserialization + - Type validation + - Field access + - Model copying + - Schema generation + """ + def model_dump( + self, + *, + mode: Literal["json", "python"] = "python", + include: set[str] | None = None, + exclude: set[str] | None = None, + by_alias: bool = False, + exclude_unset: bool = False, + exclude_defaults: bool = False, + exclude_none: bool = False, + ) -> dict[str, Any]: + """ + Convert model to dictionary. + + Parameters: + mode: Serialization mode ("json" or "python") + include: Fields to include + exclude: Fields to exclude + by_alias: Use field aliases + exclude_unset: Exclude fields that weren't set + exclude_defaults: Exclude fields with default values + exclude_none: Exclude fields with None values + + Returns: + Dictionary representation of model + """ + ... + + def model_dump_json( + self, + *, + indent: int | None = None, + include: set[str] | None = None, + exclude: set[str] | None = None, + by_alias: bool = False, + exclude_unset: bool = False, + exclude_defaults: bool = False, + exclude_none: bool = False, + ) -> str: + """ + Convert model to JSON string. + + Parameters: + indent: JSON indentation + include: Fields to include + exclude: Fields to exclude + by_alias: Use field aliases + exclude_unset: Exclude fields that weren't set + exclude_defaults: Exclude fields with default values + exclude_none: Exclude fields with None values + + Returns: + JSON string representation + """ + ... + + @classmethod + def model_validate(cls, obj: Any) -> Self: + """ + Validate and parse object into model. + + Parameters: + obj: Object to validate (dict, model instance, etc.) + + Returns: + Validated model instance + + Raises: + ValidationError: If validation fails + """ + ... + + @classmethod + def model_validate_json(cls, json_data: str | bytes) -> Self: + """ + Validate and parse JSON into model. + + Parameters: + json_data: JSON string or bytes + + Returns: + Validated model instance + + Raises: + ValidationError: If validation fails + """ + ... + + def model_copy( + self, + *, + update: dict[str, Any] | None = None, + deep: bool = False, + ) -> Self: + """ + Create copy of model with optional updates. + + Parameters: + update: Dictionary of fields to update + deep: Whether to make deep copy + + Returns: + New model instance + """ + ... + + @classmethod + def model_json_schema( + cls, + *, + by_alias: bool = True, + ref_template: str = "#/$defs/{model}", + ) -> dict[str, Any]: + """ + Generate JSON Schema for model. + + Parameters: + by_alias: Use field aliases in schema + ref_template: Template for $ref values + + Returns: + JSON Schema dictionary + """ + ... +``` + +## SDK Infrastructure Types + +### HTTP and Client Types + +```python { .api } +from typing import Union +import httpx + +Timeout = Union[float, httpx.Timeout, None] +""" +Timeout specification for HTTP requests. + +Can be: +- float: Total timeout in seconds +- httpx.Timeout: Granular timeout with connect/read/write/pool settings +- None: No timeout (not recommended) +""" + +Transport = Union[httpx.HTTPTransport, httpx.AsyncHTTPTransport] +"""HTTP transport for custom connection pooling and proxying.""" + +ProxiesTypes = Union[str, httpx.Proxy, dict[str, Union[str, httpx.Proxy]]] +""" +Proxy configuration types. + +Can be: +- str: Proxy URL +- httpx.Proxy: Configured proxy object +- dict: Mapping of protocols to proxy URLs +""" + +FileTypes = tuple[str, bytes, str] +""" +File tuple for upload operations. + +Format: (filename, content, mime_type) + +Example: + ("document.pdf", pdf_bytes, "application/pdf") +""" +``` + +### Sentinel Types + +```python { .api } +class NotGivenType: + """Singleton type for NOT_GIVEN sentinel value.""" + ... + +NotGiven = NotGivenType +""" +Sentinel value indicating a parameter was not provided. + +Used to distinguish between None (explicitly passed) and +parameter not passed at all. + +Example: + def create(*, param: str | None | NotGiven = NOT_GIVEN): + if param is NOT_GIVEN: + # Parameter not provided + elif param is None: + # Parameter explicitly set to None +""" + +NOT_GIVEN: NotGiven +"""Singleton instance of NotGiven sentinel.""" +``` + +### Response Wrapper Types + +```python { .api } +from typing import Generic, TypeVar + +T = TypeVar('T') + +class APIResponse(Generic[T]): + """ + Wrapper providing access to raw HTTP response and parsed data. + + Attributes: + http_response: Raw httpx.Response object + data: Cached parsed response data + """ + http_response: httpx.Response + + def parse(self) -> T: + """ + Parse and return typed response data. + + Returns: + Parsed response object of type T + """ + ... + +class AsyncAPIResponse(Generic[T]): + """ + Async version of APIResponse. + + Attributes: + http_response: Raw httpx.Response object + data: Cached parsed response data + """ + http_response: httpx.Response + + async def parse(self) -> T: + """ + Async parse and return typed response data. + + Returns: + Parsed response object of type T + """ + ... +``` + +### Base Model + +```python { .api } +from pydantic import BaseModel as PydanticBaseModel + +class BaseModel(PydanticBaseModel): + """ + Base Pydantic model with SDK-specific configuration. + + All response types inherit from this class, providing: + - JSON serialization via .model_dump() and .model_dump_json() + - Type validation + - Field aliases and exclusions + """ + + def model_dump(self, **kwargs) -> dict[str, Any]: + """Serialize model to dictionary.""" + ... + + def model_dump_json(self, **kwargs) -> str: + """Serialize model to JSON string.""" + ... + + @classmethod + def model_validate(cls, obj: Any) -> Self: + """Validate and construct model from dict.""" + ... + + @classmethod + def model_json_schema(cls, **kwargs) -> dict[str, Any]: + """Generate JSON Schema for model.""" + ... +``` + +## See Also + +- [Messages API](../api/messages.md) - Core message types in use +- [Streaming API](../api/streaming.md) - Stream event types in use +- [Tool Use API](../api/tools.md) - Tool types in use +- [Batches API](../api/batches.md) - Batch types in use +- [Error Handling](./errors.md) - Error types and exception hierarchy +- [Client Configuration](./client-config.md) - Usage of Timeout and Transport types +- [Utilities](./utilities.md) - Usage of FileTypes diff --git a/.tessl/tiles/tessl/pypi-anthropic/docs/reference/utilities.md b/.tessl/tiles/tessl/pypi-anthropic/docs/reference/utilities.md new file mode 100644 index 0000000..dde1fc6 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/docs/reference/utilities.md @@ -0,0 +1,169 @@ +# Utilities Reference + +Helper functions for file handling, schema transformation, and HTTP client customization. + +## File Utilities + +```python { .api } +def file_from_path(path: str | Path) -> FileTypes: + """ + Load single file from filesystem path. + + Parameters: + path: File path (string or Path object) + + Returns: + File-like object for API requests (tuple of filename, content, mime-type) + + Raises: + FileNotFoundError: If path does not exist + IsADirectoryError: If path is a directory + """ + ... + +def files_from_dir(directory: str | os.PathLike[str]) -> list[FileTypes]: + """ + Load all files from a directory recursively. + + Recursively walks through directory and loads all files, returning them + as a list suitable for batch file upload operations. + + Parameters: + directory: Directory path (string or PathLike object) + + Returns: + List of file objects, each as (filename, content, mime-type) tuple + + Raises: + FileNotFoundError: If directory does not exist + NotADirectoryError: If path is not a directory + """ + ... + +async def async_files_from_dir(directory: str | os.PathLike[str]) -> list[FileTypes]: + """ + Async version of files_from_dir(). + + Asynchronously loads all files from directory recursively. Useful for + large directories or when loading many files in async context. + + Parameters: + directory: Directory path (string or PathLike object) + + Returns: + List of file objects, each as (filename, content, mime-type) tuple + + Raises: + FileNotFoundError: If directory does not exist + NotADirectoryError: If path is not a directory + """ + ... +``` + +## Schema Transformation + +```python { .api } +def transform_schema(schema: dict[str, Any]) -> dict[str, Any]: + """ + Transform JSON schema for structured outputs. + + Parameters: + schema: JSON Schema dictionary + + Returns: + Transformed schema for API + """ + ... +``` + +## HTTP Client Factories + +```python { .api } +class DefaultHttpxClient: + """Default synchronous HTTP client.""" + def __init__( + self, + *, + proxy: str | httpx.Proxy | None = None, + transport: httpx.HTTPTransport | None = None, + **kwargs + ): ... + +class DefaultAsyncHttpxClient: + """Default asynchronous HTTP client.""" + def __init__( + self, + *, + proxy: str | httpx.Proxy | None = None, + transport: httpx.AsyncHTTPTransport | None = None, + **kwargs + ): ... + +class DefaultAioHttpClient: + """Alternative async HTTP client using aiohttp.""" + def __init__(self, **kwargs): ... +``` + +## Quick Examples + +### Load Single File + +```python +from anthropic._utils import file_from_path + +file = file_from_path("document.pdf") +uploaded = client.beta.files.upload(file, purpose="batch") +``` + +### Load Multiple Files from Directory + +```python +from anthropic._utils import files_from_dir + +# Load all files from a directory +files = files_from_dir("./documents") +print(f"Loaded {len(files)} files") + +# Upload all files for batch processing +for file in files: + uploaded = client.beta.files.upload(file, purpose="batch") + print(f"Uploaded: {uploaded.id}") +``` + +### Async Load Files from Directory + +```python +import asyncio +from anthropic._utils import async_files_from_dir + +async def upload_directory(): + client = AsyncAnthropic() + + # Asynchronously load all files + files = await async_files_from_dir("./documents") + + # Upload concurrently + tasks = [client.beta.files.upload(f, purpose="batch") for f in files] + results = await asyncio.gather(*tasks) + + print(f"Uploaded {len(results)} files") + +asyncio.run(upload_directory()) +``` + +### Transform Schema + +```python +from pydantic import BaseModel +from anthropic.lib._parse._transform import transform_schema + +class Response(BaseModel): + answer: str + confidence: float + +schema = transform_schema(Response.model_json_schema()) +``` + +## See Also + +- [Client Configuration](./client-config.md) - Client initialization diff --git a/.tessl/tiles/tessl/pypi-anthropic/tile.json b/.tessl/tiles/tessl/pypi-anthropic/tile.json new file mode 100644 index 0000000..e86f85e --- /dev/null +++ b/.tessl/tiles/tessl/pypi-anthropic/tile.json @@ -0,0 +1,8 @@ +{ + "name": "tessl/pypi-anthropic", + "version": "0.75.0", + "docs": "docs/index.md", + "describes": "pkg:pypi/anthropic@0.75.0", + "summary": "The official Python library for the anthropic API", + "private": false +} \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-jax/docs/core-transformations.md b/.tessl/tiles/tessl/pypi-jax/docs/core-transformations.md new file mode 100644 index 0000000..f405673 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-jax/docs/core-transformations.md @@ -0,0 +1,560 @@ +# Core Program Transformations + +JAX's core strength lies in its composable function transformations that enable automatic differentiation, just-in-time compilation, vectorization, and parallelization. These transformations can be arbitrarily composed and applied to pure Python functions. + +## Capabilities + +### Just-in-Time Compilation + +Compiles functions to optimized XLA code for improved performance on CPUs, GPUs, and TPUs. JIT compilation happens lazily on first call and caches compiled functions. + +```python { .api } +def jit( + fun: Callable, + in_shardings=None, + out_shardings=None, + static_argnums=None, + static_argnames=None, + donate_argnums=None, + donate_argnames=None, + keep_unused=False, + device=None, + backend=None, + inline=False, + abstracted_axes=None +) -> Callable: + """ + Just-in-time compile a function for improved performance. + + Args: + fun: Function to JIT compile + in_shardings: How inputs should be sharded across devices + out_shardings: How outputs should be sharded across devices + static_argnums: Tuple of argument indices to treat as static + static_argnames: Tuple of keyword argument names to treat as static + donate_argnums: Tuple of argument indices to donate (reuse memory) + donate_argnames: Tuple of keyword argument names to donate + keep_unused: Whether to keep unused arguments in compiled function + device: Device to place computation on + backend: Backend to use for compilation + inline: Whether to inline the function + abstracted_axes: Axes to abstract for shape polymorphism + + Returns: + JIT-compiled function with same signature as input + """ +``` + +Usage example: +```python +@jax.jit +def fast_computation(x, y): + return jnp.sum(x ** 2 + y ** 2) + +# Or with static arguments +@jax.jit(static_argnums=(1,)) +def dynamic_slice(x, size): + return x[:size] +``` + +### Automatic Differentiation + +Compute gradients of scalar-valued functions using reverse-mode automatic differentiation (backpropagation). + +```python { .api } +def grad( + fun: Callable, + argnums: int | Sequence[int] = 0, + has_aux: bool = False, + holomorphic: bool = False, + allow_int: bool = False, + reduce_axes: Sequence[int] = () +) -> Callable: + """ + Create function that computes gradient of scalar-valued function. + + Args: + fun: Function to differentiate (must return scalar) + argnums: Argument number(s) to differentiate with respect to + has_aux: Whether function returns auxiliary data (value, aux) + holomorphic: Whether function is holomorphic (complex differentiable) + allow_int: Whether to allow integer inputs + reduce_axes: Axes to reduce over when function output is not scalar + + Returns: + Function that computes gradient with respect to specified arguments + """ + +def value_and_grad( + fun: Callable, + argnums: int | Sequence[int] = 0, + has_aux: bool = False, + holomorphic: bool = False, + allow_int: bool = False, + reduce_axes: Sequence[int] = () +) -> Callable: + """ + Create function that computes both value and gradient. + + Args: + fun: Function to differentiate + argnums: Argument number(s) to differentiate with respect to + has_aux: Whether function returns auxiliary data + holomorphic: Whether function is holomorphic + allow_int: Whether to allow integer inputs + reduce_axes: Axes to reduce over when function output is not scalar + + Returns: + Function that returns (value, gradient) tuple + """ +``` + +Usage examples: +```python +def loss_fn(params, x, y): + predictions = params[0] * x + params[1] + return jnp.mean((predictions - y) ** 2) + +# Gradient function +grad_fn = jax.grad(loss_fn) +grads = grad_fn(params, x, y) + +# Value and gradient together +val_grad_fn = jax.value_and_grad(loss_fn) +loss_val, grads = val_grad_fn(params, x, y) + +# Gradient with respect to multiple arguments +multi_grad_fn = jax.grad(loss_fn, argnums=(0, 1, 2)) +param_grads, x_grads, y_grads = multi_grad_fn(params, x, y) +``` + +### Jacobian Computation + +Compute full Jacobian matrices using forward-mode or reverse-mode differentiation. + +```python { .api } +def jacobian( + fun: Callable, + argnums: int | Sequence[int] = 0, + has_aux: bool = False, + holomorphic: bool = False, + allow_int: bool = False +) -> Callable: + """ + Create function that computes Jacobian matrix. + + Args: + fun: Function to compute Jacobian of + argnums: Argument number(s) to differentiate with respect to + has_aux: Whether function returns auxiliary data + holomorphic: Whether function is holomorphic + allow_int: Whether to allow integer inputs + + Returns: + Function that returns Jacobian matrix + """ + +def jacfwd( + fun: Callable, + argnums: int | Sequence[int] = 0, + has_aux: bool = False, + holomorphic: bool = False +) -> Callable: + """ + Jacobian using forward-mode AD (efficient for tall Jacobians). + + Args: + fun: Function to differentiate + argnums: Argument number(s) to differentiate with respect to + has_aux: Whether function returns auxiliary data + holomorphic: Whether function is holomorphic + + Returns: + Function that computes Jacobian using forward-mode AD + """ + +def jacrev( + fun: Callable, + argnums: int | Sequence[int] = 0, + has_aux: bool = False, + holomorphic: bool = False +) -> Callable: + """ + Jacobian using reverse-mode AD (efficient for wide Jacobians). + + Args: + fun: Function to differentiate + argnums: Argument number(s) to differentiate with respect to + has_aux: Whether function returns auxiliary data + holomorphic: Whether function is holomorphic + + Returns: + Function that computes Jacobian using reverse-mode AD + """ + +def hessian( + fun: Callable, + argnums: int | Sequence[int] = 0, + has_aux: bool = False, + holomorphic: bool = False +) -> Callable: + """ + Create function that computes Hessian matrix (second derivatives). + + Args: + fun: Scalar-valued function to compute Hessian of + argnums: Argument number(s) to differentiate with respect to + has_aux: Whether function returns auxiliary data + holomorphic: Whether function is holomorphic + + Returns: + Function that returns Hessian matrix + """ +``` + +### Forward and Reverse Mode Primitives + +Lower-level differentiation primitives for building custom transformations. + +```python { .api } +def jvp( + fun: Callable, + primals: Sequence, + tangents: Sequence +) -> tuple: + """ + Jacobian-vector product using forward-mode AD. + + Args: + fun: Function to differentiate + primals: Point at which to evaluate function + tangents: Tangent vectors to multiply Jacobian by + + Returns: + Tuple of (primals_out, tangents_out) + """ + +def vjp( + fun: Callable, + *primals +) -> tuple: + """ + Vector-Jacobian product using reverse-mode AD. + + Args: + fun: Function to differentiate + primals: Point at which to evaluate function + + Returns: + Tuple of (primals_out, vjp_fun) where vjp_fun computes VJP + """ + +def linearize(fun: Callable, *primals) -> tuple: + """ + Linearize function around given point. + + Args: + fun: Function to linearize + primals: Point to linearize around + + Returns: + Tuple of (primals_out, jvp_fun) for computing JVPs + """ +``` + +### Vectorization + +Transform functions to work on batches of inputs by adding a batch dimension and vectorizing over it. + +```python { .api } +def vmap( + fun: Callable, + in_axes=0, + out_axes=0, + axis_name=None, + axis_size=None, + spmd_axis_name=None +) -> Callable: + """ + Vectorizing map that adds batch dimension to function. + + Args: + fun: Function to vectorize + in_axes: How to map over input arguments (int, None, or tuple) + out_axes: How to map over output values (int, None, or tuple) + axis_name: Name for the mapped axis (for use with psum etc.) + axis_size: Size of mapped axis (for use with axis_name) + spmd_axis_name: SPMD axis name for multi-device computation + + Returns: + Vectorized function that works on batches + """ +``` + +Usage examples: +```python +# Vectorize over first axis of both inputs +batch_fn = jax.vmap(single_example_fn) +batch_outputs = batch_fn(batch_inputs) + +# Vectorize with different input axes +# x has batch dim 0, y has batch dim 1 +fn = jax.vmap(process_fn, in_axes=(0, 1)) + +# Vectorize with no batch dim for some inputs +# x has batch dim 0, y is broadcast to all batch elements +fn = jax.vmap(process_fn, in_axes=(0, None)) +``` + +### Parallelization + +Distribute computation across multiple devices using SPMD (Single Program, Multiple Data) parallelism. + +```python { .api } +def pmap( + fun: Callable, + axis_name=None, + in_axes=0, + out_axes=0, + static_broadcasted_argnums=(), + devices=None, + backend=None, + axis_size=None, + donate_argnums=(), + global_arg_shapes=None +) -> Callable: + """ + Parallel map that distributes computation across multiple devices. + + Args: + fun: Function to parallelize + axis_name: Name for the parallel axis + in_axes: How to split inputs across devices + out_axes: How to collect outputs from devices + static_broadcasted_argnums: Arguments to broadcast to all devices + devices: Explicit device placement + backend: Backend to use + axis_size: Size of parallel axis + donate_argnums: Arguments to donate memory + global_arg_shapes: Global shapes for arguments + + Returns: + Function that runs in parallel across devices + """ +``` + +Usage example: +```python +# Function runs on each device with its slice of data +parallel_fn = jax.pmap(single_device_fn) +# Input shape: (num_devices, per_device_batch_size, ...) +outputs = parallel_fn(distributed_inputs) +``` + +### Memory-Efficient Gradient Computation + +Trade computation for memory using gradient checkpointing (rematerialization). + +```python { .api } +def checkpoint( + fun: Callable, + *, + concrete: bool = False, + policy: Callable = None, + prevent_cse: bool = True, + static_argnums: int | Sequence[int] = () +) -> Callable: + """ + Gradient checkpointing for memory-efficient backpropagation. + + Args: + fun: Function to apply checkpointing to + concrete: Whether to use concrete checkpointing + policy: Policy for deciding what to checkpoint + prevent_cse: Whether to prevent common subexpression elimination + static_argnums: Arguments to treat as static + + Returns: + Checkpointed function that saves memory during backward pass + """ + +# Alias for checkpoint +remat = checkpoint +``` + +Usage example: +```python +@jax.checkpoint +def expensive_layer(x, params): + # Expensive computation that will be recomputed during backprop + return jnp.tanh(x @ params) + +# Use in gradient computation to save memory +grad_fn = jax.grad(lambda params: loss(checkpoint_layer(x, params))) +``` + +### Custom Derivatives + +Define custom forward and backward passes for functions. + +```python { .api } +def custom_gradient(fun: Callable) -> Callable: + """ + Decorator to define custom gradient for function. + + The decorated function should return (primal_out, grad_fn) where + grad_fn(cotangents) -> tangents. + + Args: + fun: Function with custom gradient implementation + + Returns: + Function with custom gradient behavior + """ + +def custom_jvp(fun: Callable) -> Callable: + """ + Decorator to define custom JVP (forward-mode derivative) rule. + + Args: + fun: Function to define custom JVP for + + Returns: + Function with custom JVP behavior + """ + +def custom_vjp(fun: Callable) -> Callable: + """ + Decorator to define custom VJP (reverse-mode derivative) rule. + + Args: + fun: Function to define custom VJP for + + Returns: + Function with custom VJP behavior + """ +``` + +### Advanced Differentiation + +Additional differentiation utilities and transformations. + +```python { .api } +def stop_gradient(x) -> Array: + """ + Stop gradient computation at this point. + + Args: + x: Array to stop gradient for + + Returns: + Array with gradient flow stopped + """ + +def fwd_and_bwd( + fun: Callable, + *primals, + **kwargs +) -> tuple: + """ + Compute forward and backward passes separately. + + Args: + fun: Function to compute forward/backward for + primals: Input values + + Returns: + Tuple of (primal_out, vjp_fun) + """ + +def closure_convert( + fun: Callable, + *closed_over_vals +) -> tuple: + """ + Convert function with closure variables for differentiation. + + Args: + fun: Function with closure variables + closed_over_vals: Values closed over by function + + Returns: + Converted function and closure values + """ + +def pure_callback( + callback: Callable, + result_shape_dtypes, + *args, + sharding=None, + vmap_method=None, + **kwargs +) -> Any: + """ + Call host function with pure side effects from JAX computation. + + Args: + callback: Pure host function to call + result_shape_dtypes: Shape and dtype of callback result + args: Arguments to pass to callback + sharding: Sharding specification for result + vmap_method: How to handle vectorization + kwargs: Additional keyword arguments + + Returns: + Result of callback with specified shape and dtype + """ + +def effects_barrier() -> None: + """ + Create synchronization barrier for side effects. + + Ensures all preceding computations with side effects complete + before continuing with subsequent computations. + """ + +def named_call(f: Callable, *, name: str) -> Callable: + """ + Wrap function with a name for debugging and profiling. + + Args: + f: Function to wrap + name: Name to associate with function calls + + Returns: + Wrapped function that appears with given name in traces + """ + +def named_scope(name: str): + """ + Context manager for named scopes in JAX computations. + + Args: + name: Name for the computation scope + + Usage: + with jax.named_scope("layer1"): + output = layer_computation(input) + """ +``` + +## Transformation Composition + +JAX transformations can be arbitrarily composed for powerful effects: + +```python +# JIT-compiled gradient +fast_grad = jax.jit(jax.grad(loss_fn)) + +# Vectorized gradient (per-example gradients) +batch_grad = jax.vmap(jax.grad(loss_fn), in_axes=(None, 0, 0)) + +# Parallel gradient computation +parallel_grad = jax.pmap(jax.grad(loss_fn)) + +# Second derivatives (Hessian-vector product) +hvp = lambda v: jax.jvp(jax.grad(loss_fn), (params,), (v,))[1] + +# Gradient of gradient (for meta-learning) +meta_grad = jax.grad(lambda meta_params: loss_fn(update_fn(meta_params))) +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-jax/docs/device-memory.md b/.tessl/tiles/tessl/pypi-jax/docs/device-memory.md new file mode 100644 index 0000000..ca0628b --- /dev/null +++ b/.tessl/tiles/tessl/pypi-jax/docs/device-memory.md @@ -0,0 +1,594 @@ +# Device and Memory Management + +JAX provides comprehensive device management and distributed computing capabilities, enabling efficient use of CPUs, GPUs, and TPUs. This includes device placement, memory management, sharding for multi-device computation, and distributed array operations. + +## Core Imports + +```python +import jax +from jax import devices, device_put, make_mesh +from jax.sharding import NamedSharding, PartitionSpec as P +``` + +## Capabilities + +### Device Discovery and Information + +Query available devices and their properties for computation placement and resource management. + +```python { .api } +def devices(backend=None) -> list[Device]: + """ + Get list of all available devices. + + Args: + backend: Optional backend name ('cpu', 'gpu', 'tpu') + + Returns: + List of available Device objects + """ + +def local_devices(process_index=None, backend=None) -> list[Device]: + """ + Get list of devices local to current process. + + Args: + process_index: Process index (None for current process) + backend: Optional backend name + + Returns: + List of local Device objects + """ + +def device_count(backend=None) -> int: + """ + Get total number of devices across all processes. + + Args: + backend: Optional backend name + + Returns: + Total device count + """ + +def local_device_count(backend=None) -> int: + """ + Get number of devices on current process. + + Args: + backend: Optional backend name + + Returns: + Local device count + """ + +def host_count(backend=None) -> int: + """ + Get number of hosts in distributed computation. + + Args: + backend: Optional backend name + + Returns: + Host count + """ + +def host_id(backend=None) -> int: + """ + Get ID of current host. + + Args: + backend: Optional backend name + + Returns: + Current host ID + """ + +def host_ids(backend=None) -> list[int]: + """ + Get list of all host IDs. + + Args: + backend: Optional backend name + + Returns: + List of host IDs + """ + +def process_count(backend=None) -> int: + """ + Get number of processes in distributed computation. + + Args: + backend: Optional backend name + + Returns: + Process count + """ + +def process_index(backend=None) -> int: + """ + Get index of current process. + + Args: + backend: Optional backend name + + Returns: + Current process index + """ + +def process_indices(backend=None) -> list[int]: + """ + Get list of all process indices. + + Args: + backend: Optional backend name + + Returns: + List of process indices + """ + +def default_backend() -> str: + """ + Get name of default backend. + + Returns: + Default backend name string + """ +``` + +### Device Placement and Data Movement + +Control where computations run and move data between devices and host memory. + +```python { .api } +def device_put(x, device=None, src=None) -> Array: + """ + Move array to specified device. + + Args: + x: Array or array-like object to move + device: Target device (None for default device) + src: Source device for the transfer + + Returns: + Array placed on target device + """ + +def device_put_sharded( + sharded_values: list, + devices: list[Device], + indices=None +) -> Array: + """ + Create sharded array from per-device values. + + Args: + sharded_values: List of arrays, one per device + devices: List of target devices + indices: Optional sharding indices + + Returns: + Distributed array sharded across devices + """ + +def device_put_replicated(x, devices: list[Device]) -> Array: + """ + Replicate array across multiple devices. + + Args: + x: Array to replicate + devices: List of target devices + + Returns: + Array replicated across all specified devices + """ + +def device_get(x) -> Any: + """ + Move array from device to host memory as NumPy array. + + Args: + x: Array to move to host + + Returns: + NumPy array in host memory + """ + +def copy_to_host_async(x) -> Any: + """ + Asynchronously copy array to host memory. + + Args: + x: Array to copy + + Returns: + Future-like object for async copy + """ + +def block_until_ready(x) -> Array: + """ + Block until array computation is complete and ready. + + Args: + x: Array to wait for + + Returns: + The same array, guaranteed to be ready + """ +``` + +Usage examples: +```python +# Check available devices +all_devices = jax.devices() +print(f"Available devices: {all_devices}") +print(f"Device count: {jax.device_count()}") + +# Move data to specific device +cpu_data = jnp.array([1, 2, 3, 4]) +if jax.devices('gpu'): + gpu_data = jax.device_put(cpu_data, jax.devices('gpu')[0]) + print(f"Data is on: {gpu_data.device()}") + +# Move back to host +host_data = jax.device_get(gpu_data) # Returns NumPy array + +# Explicit device placement in computations +with jax.default_device(jax.devices('cpu')[0]): + cpu_result = jnp.sum(jnp.array([1, 2, 3])) +``` + +### Sharding and Distributed Arrays + +Define how arrays are distributed across multiple devices for parallel computation. + +```python { .api } +class NamedSharding: + """ + Sharding specification using named mesh axes. + + Defines how arrays are partitioned across devices using logical axis names. + """ + + def __init__(self, mesh, spec): + """ + Create named sharding specification. + + Args: + mesh: Device mesh with named axes + spec: Partition specification (PartitionSpec) + """ + self.mesh = mesh + self.spec = spec + +class PartitionSpec: + """ + Specification for how to partition array dimensions across mesh axes. + + Use P(axis_names...) to create partition specifications. + """ + pass + +# Alias for PartitionSpec +P = PartitionSpec + +def make_mesh(mesh_shape, axis_names) -> Mesh: + """ + Create device mesh for distributed computation. + + Args: + mesh_shape: Shape of device mesh (tuple of integers) + axis_names: Names for mesh axes (tuple of strings) + + Returns: + Mesh object representing device layout + """ + +class Mesh: + """Device mesh for distributed computation.""" + devices: Array # Device array in mesh shape + axis_names: tuple[str, ...] # Names of mesh axes + + @property + def shape(self) -> dict[str, int]: + """Dictionary mapping axis names to sizes.""" + + @property + def size(self) -> int: + """Total number of devices in mesh.""" + +def make_array_from_single_device_arrays( + arrays: list[Array], + sharding: Sharding +) -> Array: + """ + Create distributed array from per-device arrays. + + Args: + arrays: List of arrays on different devices + sharding: Sharding specification + + Returns: + Distributed array with specified sharding + """ + +def make_array_from_callback( + shape: tuple[int, ...], + sharding: Sharding, + data_callback: Callable +) -> Array: + """ + Create distributed array using callback function. + + Args: + shape: Global array shape + sharding: Sharding specification + data_callback: Function to generate data for each shard + + Returns: + Distributed array created from callback + """ + +def make_array_from_process_local_data( + sharding: Sharding, + local_data: Array +) -> Array: + """ + Create distributed array from process-local data. + + Args: + sharding: Sharding specification + local_data: Data local to current process + + Returns: + Distributed array assembled from local data + """ +``` + +### Sharded Computation + +Execute computations on sharded arrays with explicit control over parallelization. + +```python { .api } +def shard_map( + f: Callable, + mesh: Mesh, + in_specs, + out_specs, + check_rep=True +) -> Callable: + """ + Transform function to operate on sharded arrays. + + Args: + f: Function to transform + mesh: Device mesh for computation + in_specs: Input sharding specifications + out_specs: Output sharding specifications + check_rep: Whether to check for replication consistency + + Returns: + Function that operates on globally sharded arrays + """ + +# Alias for shard_map +smap = shard_map + +def with_sharding_constraint(x, sharding) -> Array: + """ + Add sharding constraint to array. + + Args: + x: Input array + sharding: Desired sharding specification + + Returns: + Array with sharding constraint applied + """ +``` + +Usage examples: +```python +# Create 2x2 device mesh +devices_array = jnp.array(jax.devices()[:4]).reshape(2, 2) +mesh = jax.make_mesh((2, 2), ('data', 'model')) + +# Define sharding specifications +data_sharding = NamedSharding(mesh, P('data', None)) # Shard first axis across 'data' +model_sharding = NamedSharding(mesh, P(None, 'model')) # Shard second axis across 'model' +replicated_sharding = NamedSharding(mesh, P()) # Replicated across all devices + +# Create sharded arrays +x = jax.random.normal(jax.random.key(0), (8, 4)) +x_sharded = jax.device_put(x, data_sharding) + +weights = jax.random.normal(jax.random.key(1), (4, 8)) +weights_sharded = jax.device_put(weights, model_sharding) + +# Computation with sharded arrays automatically parallelized +@jax.jit +def matmul_fn(x, w): + return x @ w + +result = matmul_fn(x_sharded, weights_sharded) # Automatically sharded computation + +# Explicit sharding control +def single_device_fn(x_shard, w_shard): + return x_shard @ w_shard + +parallel_fn = jax.shard_map( + single_device_fn, + mesh=mesh, + in_specs=(P('data', None), P(None, 'model')), + out_specs=P('data', 'model') +) + +result = parallel_fn(x_sharded, weights_sharded) +``` + +### Memory Management + +Control memory usage and optimize performance through explicit memory management. + +```python { .api } +def live_arrays() -> list[Array]: + """ + Get list of arrays currently alive in memory. + + Returns: + List of live Array objects + """ + +def clear_caches() -> None: + """ + Clear JAX's internal caches to free memory. + + Clears JIT compilation cache, device buffer cache, and other internal caches. + """ +``` + +### Configuration and Backend Management + +Configure device behavior and backend selection. + +```python { .api } +# Configuration through jax.config +jax.config.update('jax_platform_name', 'cpu') # Force CPU backend +jax.config.update('jax_platform_name', 'gpu') # Force GPU backend +jax.config.update('jax_platform_name', 'tpu') # Force TPU backend + +# Transfer guards to catch unintentional device transfers +jax.config.update('jax_transfer_guard', 'allow') # Default: allow all transfers +jax.config.update('jax_transfer_guard', 'log') # Log transfers +jax.config.update('jax_transfer_guard', 'disallow') # Disallow transfers +jax.config.update('jax_transfer_guard', 'log_explicit_device_put') # Log explicit transfers + +# Default device configuration +jax.config.update('jax_default_device', jax.devices('gpu')[0]) # Set default device +``` + +### Array and Device Properties + +Inspect array placement and device properties. + +```python { .api } +# Array device methods +array.device() -> Device # Get device containing array +array.devices() -> set[Device] # Get all devices for distributed array +array.sharding -> Sharding # Get array's sharding specification +array.is_fully_replicated -> bool # Check if array is replicated +array.is_fully_addressable -> bool # Check if array is fully addressable + +# Device properties +class Device: + """Device object representing compute accelerator.""" + + platform: str # Platform name ('cpu', 'gpu', 'tpu') + device_kind: str # Device kind string + id: int # Device ID within platform + host_id: int # Host ID containing device + process_index: int # Process index containing device + + def __str__(self) -> str: ... + def __repr__(self) -> str: ... +``` + +## Advanced Usage Patterns + +### Multi-Device Training + +```python +# Setup for data-parallel training +def create_train_setup(num_devices): + # Create mesh for data parallelism + mesh = jax.make_mesh((num_devices,), ('batch',)) + + # Sharding specifications + batch_sharding = NamedSharding(mesh, P('batch')) # Batch dimension sharded + replicated_sharding = NamedSharding(mesh, P()) # Parameters replicated + + return mesh, batch_sharding, replicated_sharding + +def distributed_train_step(params, batch, optimizer_state): + # All arrays should already have appropriate sharding + grads = jax.grad(loss_fn)(params, batch) + + # Update step automatically uses sharding from inputs + new_params, new_state = optimizer.update(grads, optimizer_state, params) + return new_params, new_state + +# JIT compile with sharding +distributed_train_step = jax.jit( + distributed_train_step, + in_shardings=(replicated_sharding, batch_sharding, replicated_sharding), + out_shardings=(replicated_sharding, replicated_sharding) +) +``` + +### Model Parallelism + +```python +# Setup for model-parallel computation +def create_model_parallel_setup(): + # 2D mesh: batch x model dimensions + mesh = jax.make_mesh((2, 4), ('batch', 'model')) + + # Different sharding strategies + input_sharding = NamedSharding(mesh, P('batch', None)) + weight_sharding = NamedSharding(mesh, P(None, 'model')) + output_sharding = NamedSharding(mesh, P('batch', 'model')) + + return mesh, input_sharding, weight_sharding, output_sharding + +def model_parallel_layer(x, weights): + # Matrix multiply with different sharding patterns + return x @ weights # JAX handles the communication automatically + +# Shard arrays according to strategy +x = jax.device_put(x, input_sharding) +weights = jax.device_put(weights, weight_sharding) +result = model_parallel_layer(x, weights) # Result has output_sharding +``` + +### Memory-Efficient Inference + +```python +def memory_efficient_inference(model_fn, large_input): + # Process in chunks to manage memory + chunk_size = 1000 + chunks = [large_input[i:i+chunk_size] for i in range(0, len(large_input), chunk_size)] + + results = [] + for chunk in chunks: + # Move to device, compute, move back to host + device_chunk = jax.device_put(chunk) + device_result = model_fn(device_chunk) + host_result = jax.device_get(device_result) + results.append(host_result) + + # Optional: clear caches to free memory + jax.clear_caches() + + return jnp.concatenate(results) +``` + +### Cross-Device Communication Patterns + +```python +# Collective operations using pmap +@jax.pmap +def allreduce_example(x): + # Sum across all devices + return jax.lax.psum(x, axis_name='batch') + +@jax.pmap +def allgather_example(x): + # Gather from all devices + return jax.lax.all_gather(x, axis_name='batch') + +# Use with replicated data +replicated_data = jax.device_put_replicated(data, jax.devices()) +summed_result = allreduce_example(replicated_data) +gathered_result = allgather_example(replicated_data) +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-jax/docs/experimental.md b/.tessl/tiles/tessl/pypi-jax/docs/experimental.md new file mode 100644 index 0000000..a8bd90e --- /dev/null +++ b/.tessl/tiles/tessl/pypi-jax/docs/experimental.md @@ -0,0 +1,483 @@ +# Experimental Features + +JAX experimental features provide access to cutting-edge capabilities, performance optimizations, and research functionality through `jax.experimental`. These features may change or be moved to the main JAX API in future versions. + +**Warning**: Experimental APIs may change without notice between JAX versions. Use with caution in production code. + +## Core Imports + +```python +import jax.experimental as jex +from jax.experimental import io_callback, enable_x64 +``` + +## Capabilities + +### Precision Control + +Control floating-point precision globally across JAX computations. + +```python { .api } +def enable_x64(enable: bool = True) -> None: + """ + Enable or disable 64-bit floating point precision. + + Args: + enable: Whether to enable 64-bit precision (default: True) + + Note: + This sets jax_enable_x64 config flag globally + """ + +def disable_x64() -> None: + """ + Disable 64-bit floating point precision. + + Convenience function equivalent to enable_x64(False). + """ +``` + +Usage examples: +```python +# Enable double precision +jax.experimental.enable_x64() +x = jnp.array(1.0) # Now defaults to float64 instead of float32 +print(x.dtype) # dtype('float64') + +# Disable double precision +jax.experimental.disable_x64() +y = jnp.array(1.0) # Back to float32 +print(y.dtype) # dtype('float32') +``` + +### I/O and Callbacks + +Enable host callbacks for I/O operations and side effects within JAX computations. + +```python { .api } +def io_callback( + callback: Callable, + result_shape_dtypes, + *args, + sharding=None, + vmap_method=None, + ordered=False, + **kwargs +) -> Any: + """ + Call host function from within JAX computation with I/O side effects. + + Args: + callback: Host function to call (should be pure except for I/O) + result_shape_dtypes: Shape and dtype specification for callback result + args: Arguments to pass to callback + sharding: Sharding specification for result + vmap_method: How to handle vmapping ('sequential', 'expand_dims', etc.) + ordered: Whether to maintain call ordering across devices + kwargs: Additional keyword arguments for callback + + Returns: + Result of callback with specified shape and dtype + """ +``` + +Usage examples: +```python +# Logging during computation (debugging) +def log_value(x, step): + print(f"Step {step}: value = {x}") + return x + +@jax.jit +def training_step(x, step): + # Log intermediate values during training + x = jax.experimental.io_callback( + log_value, + jax.ShapeDtypeStruct(x.shape, x.dtype), + x, step + ) + return x * 2 + +# File I/O during computation +def save_checkpoint(params, step): + import pickle + with open(f'checkpoint_{step}.pkl', 'wb') as f: + pickle.dump(params, f) + return step + +@jax.jit +def train_with_checkpointing(params, data, step): + # Training computation + loss = compute_loss(params, data) + grads = jax.grad(compute_loss)(params, data) + new_params = update_params(params, grads) + + # Save checkpoint every 100 steps + step = jax.experimental.io_callback( + save_checkpoint, + jax.ShapeDtypeStruct((), jnp.int32), + new_params, step + ) + + return new_params, loss +``` + +### Advanced Differentiation + +Experimental differentiation features and optimizations. + +```python { .api } +def saved_input_vjp(f, *primals) -> tuple[Any, Callable]: + """ + Vector-Jacobian product with saved inputs for memory efficiency. + + Args: + f: Function to differentiate + primals: Input values + + Returns: + Tuple of (primal_out, vjp_fun) where vjp_fun has access to saved inputs + """ + +# Alias for saved_input_vjp +si_vjp = saved_input_vjp +``` + +Usage example: +```python +def expensive_function(x, y): + # Some expensive computation that we want to differentiate + z = jnp.exp(x) + jnp.sin(y) + return jnp.sum(z ** 2) + +# Use saved input VJP for memory efficiency +x, y = jnp.array([1.0, 2.0]), jnp.array([3.0, 4.0]) +primal_out, vjp_fn = jax.experimental.saved_input_vjp(expensive_function, x, y) + +# Compute VJP with cotangent +cotangent = 1.0 +x_grad, y_grad = vjp_fn(cotangent) +``` + +### Extended Array Types + +Experimental array types and extended functionality. + +```python { .api } +class EArray: + """ + Extended array type with additional metadata and functionality. + + Experimental array type that may include additional features + beyond standard JAX arrays. + """ + pass + +class MutableArray: + """ + Experimental mutable array type for specific use cases. + + Warning: Breaks JAX's functional programming model. Use carefully. + """ + pass + +def mutable_array(init_val) -> MutableArray: + """ + Create mutable array from initial value. + + Args: + init_val: Initial array value + + Returns: + MutableArray that can be modified in-place + """ +``` + +### Type System Extensions + +Experimental extensions to JAX's type system. + +```python { .api } +def primal_tangent_dtype(primal_dtype, tangent_dtype=None): + """ + Create dtype for primal-tangent pairs in forward-mode AD. + + Args: + primal_dtype: Data type for primal values + tangent_dtype: Data type for tangent values (defaults to primal_dtype) + + Returns: + Combined dtype for primal-tangent computation + """ +``` + +### Compilation and Performance + +Experimental compilation features and performance optimizations. + +```python { .api } +# Compilation control +def disable_jit_cache() -> None: + """Disable JIT compilation cache for debugging.""" + +def enable_jit_cache() -> None: + """Re-enable JIT compilation cache.""" + +# Performance monitoring +def compilation_cache_stats() -> dict: + """Get statistics about JIT compilation cache.""" + +def clear_compilation_cache() -> None: + """Clear JIT compilation cache.""" +``` + +### Hardware-Specific Features + +Experimental features for specific hardware accelerators. + +```python { .api } +# TPU-specific features +class TPUMemoryFraction: + """Control TPU memory usage fraction.""" + +def set_tpu_memory_fraction(fraction: float) -> None: + """ + Set fraction of TPU memory to use. + + Args: + fraction: Memory fraction (0.0 to 1.0) + """ + +# GPU-specific features +def gpu_memory_stats() -> dict: + """Get GPU memory usage statistics.""" + +def set_gpu_memory_growth(enable: bool) -> None: + """ + Enable/disable GPU memory growth. + + Args: + enable: Whether to enable incremental memory allocation + """ +``` + +### Automatic Mixed Precision + +Experimental automatic mixed precision for training acceleration. + +```python { .api } +class AutoMixedPrecision: + """Automatic mixed precision policy for training.""" + + def __init__(self, policy='float16'): + """ + Initialize AMP policy. + + Args: + policy: Precision policy ('float16', 'bfloat16', etc.) + """ + self.policy = policy + + def __call__(self, fn): + """Apply AMP to function.""" + pass + +def amp_policy(policy_name: str) -> AutoMixedPrecision: + """ + Create automatic mixed precision policy. + + Args: + policy_name: Name of precision policy + + Returns: + AMP policy object + """ +``` + +### Distributed Computing Extensions + +Experimental distributed computing features beyond standard pmap/shard_map. + +```python { .api } +def multi_host_utils(): + """Utilities for multi-host distributed computation.""" + pass + +class GlobalDeviceArray: + """ + Experimental global device array for large-scale distributed computation. + + Represents arrays that span multiple hosts in distributed setting. + """ + pass + +def create_global_device_array( + shape, + dtype, + mesh, + partition_spec +) -> GlobalDeviceArray: + """ + Create global device array across distributed system. + + Args: + shape: Global array shape + dtype: Array data type + mesh: Device mesh specification + partition_spec: How to partition array + + Returns: + Global device array + """ +``` + +### Research and Prototype Features + +Cutting-edge research features that may be highly experimental. + +```python { .api } +# Sparsity support +class SparseArray: + """Experimental sparse array support.""" + pass + +def sparse_ops(): + """Sparse operations module (highly experimental).""" + pass + +# Quantization support +def quantized_dot(lhs, rhs, **kwargs): + """Experimental quantized matrix multiplication.""" + pass + +def quantization_utils(): + """Utilities for quantized computation.""" + pass + +# Custom operators +def custom_op_builder(): + """Builder for custom XLA operations.""" + pass + +# Advanced compilation +def ahead_of_time_compile(fn, *args, **kwargs): + """Ahead-of-time compilation (experimental).""" + pass +``` + +### Debugging and Profiling + +Experimental debugging and profiling tools. + +```python { .api } +def debug_callback(callback, *args, **kwargs): + """ + Debug callback that doesn't affect computation graph. + + Args: + callback: Debug function to call + args: Arguments to callback + kwargs: Keyword arguments to callback + """ + +def trace_function(fn): + """ + Trace function execution for debugging. + + Args: + fn: Function to trace + + Returns: + Traced version of function + """ + +def memory_profiler(): + """Memory profiling utilities.""" + pass + +def computation_graph_visualizer(): + """Tools for visualizing computation graphs.""" + pass +``` + +## Migration Patterns + +When experimental features graduate to main JAX API: + +```python +# Old experimental usage +from jax.experimental import feature_name + +# New main API usage (after graduation) +from jax import feature_name + +# Or sometimes moves to different module +from jax.some_module import feature_name +``` + +## Usage Guidelines + +### Best Practices for Experimental Features + +```python +# 1. Version pinning when using experimental features +# requirements.txt: jax==0.7.1 # Pin exact version + +# 2. Graceful fallbacks +try: + from jax.experimental import new_feature + use_experimental = True +except ImportError: + use_experimental = False + +def my_function(x): + if use_experimental: + return new_feature.optimized_op(x) + else: + return traditional_op(x) + +# 3. Feature flags for experimental code +USE_EXPERIMENTAL_AMP = False + +if USE_EXPERIMENTAL_AMP: + amp_policy = jax.experimental.amp_policy('float16') + train_fn = amp_policy(train_fn) + +# 4. Documentation and warnings +def experimental_model_fn(x): + """ + Model function using experimental JAX features. + + Warning: Uses jax.experimental.* APIs that may change. + Tested with JAX v0.7.1. + """ + # Implementation using experimental features + pass +``` + +### Testing Experimental Features + +```python +import pytest + +# Skip tests if experimental feature not available +@pytest.mark.skipif( + not hasattr(jax.experimental, 'new_feature'), + reason="Experimental feature not available" +) +def test_experimental_feature(): + # Test experimental functionality + pass + +# Conditional testing based on JAX version +import jax +jax_version = tuple(map(int, jax.__version__.split('.')[:2])) + +@pytest.mark.skipif( + jax_version < (0, 7), + reason="Feature requires JAX >= 0.7" +) +def test_version_dependent_feature(): + # Test version-dependent experimental feature + pass +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-jax/docs/index.md b/.tessl/tiles/tessl/pypi-jax/docs/index.md new file mode 100644 index 0000000..94db3bc --- /dev/null +++ b/.tessl/tiles/tessl/pypi-jax/docs/index.md @@ -0,0 +1,316 @@ +# JAX + +JAX is a NumPy-compatible library that provides composable transformations of Python+NumPy programs: differentiate, compile, and transform Numpy code. JAX brings together a powerful ecosystem of program transformations including automatic differentiation (grad), just-in-time compilation (jit), vectorization (vmap), and parallelization (pmap) with support for CPUs, GPUs, and TPUs. + +## Package Information + +- **Package Name**: jax +- **Language**: Python +- **Installation**: `pip install jax[cpu]` (CPU) or `pip install jax[cuda12]` (GPU) + +## Core Imports + +```python +import jax +import jax.numpy as jnp +from jax import grad, jit, vmap, pmap +``` + +Import specific transformations: + +```python +from jax import ( + grad, jit, vmap, pmap, jacfwd, jacrev, + hessian, value_and_grad, checkpoint +) +``` + +Import array types and devices: + +```python +from jax import Array, Device +import jax.numpy as jnp +import jax.random as jr +import jax.lax as lax +import jax.scipy as jsp +import jax.nn as jnn +import jax.tree as tree +``` + +## Basic Usage + +```python +import jax +import jax.numpy as jnp +from jax import grad, jit, vmap + +# NumPy-compatible arrays and operations +x = jnp.array([1.0, 2.0, 3.0, 4.0]) +y = jnp.sum(x ** 2) # JAX arrays work like NumPy + +# Automatic differentiation +def loss_fn(params, x, y): + pred = params[0] * x + params[1] + return jnp.mean((pred - y) ** 2) + +# Compute gradient of loss function +grad_fn = grad(loss_fn) +params = jnp.array([0.5, 0.1]) +gradients = grad_fn(params, x, y) + +# Just-in-time compilation for performance +@jit +def fast_function(x): + return jnp.sum(x ** 2) + jnp.sin(x).sum() + +result = fast_function(x) + +# Vectorization across batch dimension +@vmap +def process_batch(single_input): + return single_input ** 2 + jnp.sin(single_input) + +batch_data = jnp.array([[1, 2], [3, 4], [5, 6]]) +batch_result = process_batch(batch_data) + +# Random number generation +key = jax.random.key(42) +random_data = jax.random.normal(key, (10, 5)) + +# Device management +print(f"Available devices: {jax.devices()}") +array_on_gpu = jax.device_put(x, jax.devices()[0]) +``` + +## Architecture + +JAX's power comes from its composable function transformations that can be applied to pure Python functions: + +- **Pure Functions**: JAX transformations require functions to be functionally pure (no side effects) +- **Function Transformations**: grad, jit, vmap, pmap can be arbitrarily composed +- **XLA Compilation**: Just-in-time compilation to optimized accelerator code +- **Array Programming**: NumPy-compatible array operations with immutable semantics +- **Device Model**: Transparent execution across CPU, GPU, and TPU with explicit device management + +The composability enables powerful patterns like `jit(grad(loss_fn))` or `vmap(grad(per_example_loss))`. + +## Capabilities + +### Core Program Transformations + +The fundamental JAX transformations that enable automatic differentiation, compilation, vectorization, and parallelization. These transformations are the core of JAX's power and can be arbitrarily composed. + +```python { .api } +def jit(fun: Callable, **kwargs) -> Callable: ... +def grad(fun: Callable, argnums: int | Sequence[int] = 0, **kwargs) -> Callable: ... +def vmap(fun: Callable, in_axes=0, out_axes=0, **kwargs) -> Callable: ... +def pmap(fun: Callable, axis_name=None, **kwargs) -> Callable: ... +def value_and_grad(fun: Callable, argnums: int | Sequence[int] = 0, **kwargs) -> Callable: ... +``` + +[Core Transformations](./core-transformations.md) + +### NumPy Compatibility API + +Complete NumPy-compatible array operations including creation, manipulation, mathematical functions, linear algebra, and reductions. JAX arrays are immutable and support the full NumPy API with added benefits of JIT compilation and automatic differentiation. + +```python { .api } +# Array creation +def array(object, dtype=None, **kwargs) -> Array: ... +def zeros(shape, dtype=None) -> Array: ... +def ones(shape, dtype=None) -> Array: ... +def arange(start, stop=None, step=None, dtype=None) -> Array: ... + +# Mathematical operations +def sum(a, axis=None, **kwargs) -> Array: ... +def mean(a, axis=None, **kwargs) -> Array: ... +def dot(a, b) -> Array: ... +def matmul(x1, x2) -> Array: ... +``` + +[NumPy Compatibility](./numpy-compatibility.md) + +### Neural Network Functions + +Activation functions, initializers, and neural network utilities commonly used in machine learning. Includes all standard activations like ReLU, sigmoid, softmax, and modern variants like GELU, Swish, and attention mechanisms. + +```python { .api } +def relu(x) -> Array: ... +def sigmoid(x) -> Array: ... +def softmax(x, axis=-1) -> Array: ... +def gelu(x, approximate=True) -> Array: ... +def silu(x) -> Array: ... +def one_hot(x, num_classes, **kwargs) -> Array: ... +def dot_product_attention(query, key, value, **kwargs) -> Array: ... +``` + +[Neural Networks](./neural-networks.md) + +### Random Number Generation + +Functional pseudo-random number generation with explicit key management. JAX uses a functional approach to random numbers that enables reproducibility, parallelization, and vectorization. + +```python { .api } +def key(seed: int) -> Array: ... +def split(key: Array, num: int = 2) -> Array: ... +def normal(key: Array, shape=(), dtype=float) -> Array: ... +def uniform(key: Array, shape=(), minval=0.0, maxval=1.0) -> Array: ... +def categorical(key: Array, logits, **kwargs) -> Array: ... +def choice(key: Array, a, **kwargs) -> Array: ... +``` + +[Random Numbers](./random-numbers.md) + +### Low-Level Operations + +Direct XLA operations and primitives for high-performance computing. These provide the building blocks for JAX's higher-level operations and enable custom operations and optimizations. + +```python { .api } +def add(x, y) -> Array: ... +def mul(x, y) -> Array: ... +def dot_general(lhs, rhs, dimension_numbers, **kwargs) -> Array: ... +def conv_general_dilated(lhs, rhs, **kwargs) -> Array: ... +def reduce_sum(operand, axes) -> Array: ... +def cond(pred, true_fun, false_fun, *operands) -> Any: ... +def while_loop(cond_fun, body_fun, init_val) -> Any: ... +def scan(f, init, xs, **kwargs) -> tuple[Any, Array]: ... +``` + +[Low-Level Operations](./low-level-ops.md) + +### SciPy Compatibility + +SciPy-compatible functions for scientific computing including linear algebra, signal processing, special functions, statistics, and sparse operations. Provides a familiar interface for scientific Python users. + +```python { .api } +# Linear algebra (jax.scipy.linalg) +def solve(a, b) -> Array: ... +def eig(a, **kwargs) -> tuple[Array, Array]: ... +def svd(a, **kwargs) -> tuple[Array, Array, Array]: ... + +# Special functions (jax.scipy.special) +def logsumexp(a, **kwargs) -> Array: ... +def erf(x) -> Array: ... +def gamma(x) -> Array: ... + +# Statistics (jax.scipy.stats) +def norm.pdf(x, loc=0, scale=1) -> Array: ... +def multivariate_normal.pdf(x, mean, cov) -> Array: ... +``` + +[SciPy Compatibility](./scipy-compatibility.md) + +### Tree Operations + +Utilities for working with PyTrees (nested Python structures containing arrays). Essential for handling complex data structures in functional programming patterns and neural network parameters. + +```python { .api } +def tree_map(f, tree, *rest) -> Any: ... +def tree_reduce(function, tree, **kwargs) -> Any: ... +def tree_flatten(tree) -> tuple[list, Any]: ... +def tree_unflatten(treedef, leaves) -> Any: ... +def tree_leaves(tree) -> list: ... +def tree_structure(tree) -> Any: ... +``` + +[Tree Operations](./tree-operations.md) + +### Device and Memory Management + +Device placement, memory management, and distributed computing primitives. Enables efficient use of accelerators and scaling across multiple devices. + +```python { .api } +def devices() -> list[Device]: ... +def device_put(x, device=None) -> Array: ... +def device_get(x) -> Any: ... +class NamedSharding: ... +def make_mesh(*mesh_axes, axis_names=None) -> Mesh: ... +def shard_map(f, mesh, in_specs, out_specs, **kwargs) -> Callable: ... +``` + +[Device and Memory Management](./device-memory.md) + +### Experimental Features + +Cutting-edge and experimental JAX features including new APIs, performance optimizations, and research capabilities. These features may change in future versions. + +```python { .api } +def io_callback(callback, result_shape_dtypes, *args, **kwargs) -> Any: ... +def enable_x64(enable=True) -> None: ... +class MutableArray: ... +def saved_input_vjp(f, *primals) -> tuple[Any, Callable]: ... +``` + +[Experimental Features](./experimental.md) + +## Core Types + +```python { .api } +class Array: + """JAX array type for numerical computing.""" + shape: tuple[int, ...] + dtype: numpy.dtype + size: int + ndim: int + + def __array__(self) -> numpy.ndarray: ... + def __getitem__(self, key) -> Array: ... + def astype(self, dtype) -> Array: ... + def reshape(self, *shape) -> Array: ... + def transpose(self, *axes) -> Array: ... + +class Device: + """Device abstraction for accelerators.""" + platform: str + device_kind: str + id: int + host_id: int + +class ShapeDtypeStruct: + """Shape and dtype structure for abstract evaluation.""" + shape: tuple[int, ...] + dtype: numpy.dtype + + def __init__(self, shape, dtype): ... + +PRNGKeyArray = Array # Type alias for PRNG keys +``` + +## Configuration and Debugging + +```python { .api } +# Configuration flags +jax.config.update('jax_enable_x64', True) # Enable 64-bit precision +jax.config.update('jax_debug_nans', True) # Debug NaN values +jax.config.update('jax_debug_infs', True) # Debug Inf values +jax.config.update('jax_platform_name', 'cpu') # Force platform +jax.config.update('jax_default_device', device) # Set default device +jax.config.update('jax_compilation_cache_dir', '/path/to/cache') # Cache directory +jax.config.update('jax_disable_jit', True) # Disable JIT globally +jax.config.update('jax_log_compiles', True) # Log compilation events + +# Core utilities and debugging +def typeof(x) -> Any: ... +def live_arrays() -> list[Array]: ... +def clear_caches() -> None: ... +def make_jaxpr(fun) -> Callable: ... +def eval_shape(fun, *args, **kwargs) -> Any: ... +def print_environment_info() -> None: ... +def ensure_compile_time_eval() -> None: ... +def pure_callback(callback, result_shape_dtypes, *args, **kwargs) -> Any: ... +def effects_barrier() -> None: ... +def named_call(f, *, name: str) -> Callable: ... +def named_scope(name: str): ... +def disable_jit(disable: bool = True): ... + +# Memory and performance utilities +def device_count_per_host() -> int: ... +def host_callback(callback, result_shape, *args, **kwargs) -> Any: ... +def make_mesh(*mesh_axes, axis_names=None) -> Any: ... +def with_sharding_constraint(x, constraint) -> Array: ... + +# Advanced debugging +def debug_print(fmt: str, *args) -> None: ... +def debug_callback(callback, *args) -> None: ... +def debug_key_reuse(enable: bool = True) -> None: ... +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-jax/docs/low-level-ops.md b/.tessl/tiles/tessl/pypi-jax/docs/low-level-ops.md new file mode 100644 index 0000000..a5bdfc6 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-jax/docs/low-level-ops.md @@ -0,0 +1,939 @@ +# Low-Level Operations + +JAX LAX provides direct XLA operations and primitives for high-performance computing. These low-level functions offer precise control over computation and serve as building blocks for higher-level JAX operations. + +## Core Imports + +```python +import jax.lax as lax +from jax.lax import add, mul, dot_general, cond, scan +``` + +## Capabilities + +### Arithmetic Operations + +Element-wise arithmetic operations that map directly to XLA primitives. + +```python { .api } +def add(x, y) -> Array: + """Element-wise addition.""" + +def sub(x, y) -> Array: + """Element-wise subtraction.""" + +def mul(x, y) -> Array: + """Element-wise multiplication.""" + +def div(x, y) -> Array: + """Element-wise division.""" + +def rem(x, y) -> Array: + """Element-wise remainder.""" + +def max(x, y) -> Array: + """Element-wise maximum.""" + +def min(x, y) -> Array: + """Element-wise minimum.""" + +def abs(x) -> Array: + """Element-wise absolute value.""" + +def neg(x) -> Array: + """Element-wise negation.""" + +def sign(x) -> Array: + """Element-wise sign function.""" + +def pow(x, y) -> Array: + """Element-wise power operation.""" + +def integer_pow(x, y) -> Array: + """Element-wise integer power.""" + +def reciprocal(x) -> Array: + """Element-wise reciprocal (1/x).""" + +def square(x) -> Array: + """Element-wise square.""" + +def sqrt(x) -> Array: + """Element-wise square root.""" + +def rsqrt(x) -> Array: + """Element-wise reciprocal square root (1/√x).""" + +def cbrt(x) -> Array: + """Element-wise cube root.""" + +def clamp(min, x, max) -> Array: + """ + Clamp values between minimum and maximum. + + Args: + min: Minimum value + x: Input array + max: Maximum value + + Returns: + Array with values clamped to [min, max] + """ +``` + +### Mathematical Functions + +Transcendental and special mathematical functions. + +```python { .api } +# Trigonometric functions +def sin(x) -> Array: ... +def cos(x) -> Array: ... +def tan(x) -> Array: ... +def asin(x) -> Array: ... +def acos(x) -> Array: ... +def atan(x) -> Array: ... +def atan2(x, y) -> Array: ... + +# Hyperbolic functions +def sinh(x) -> Array: ... +def cosh(x) -> Array: ... +def tanh(x) -> Array: ... +def asinh(x) -> Array: ... +def acosh(x) -> Array: ... +def atanh(x) -> Array: ... + +# Exponential and logarithmic +def exp(x) -> Array: ... +def exp2(x) -> Array: ... +def expm1(x) -> Array: ... +def log(x) -> Array: ... +def log1p(x) -> Array: ... +def logistic(x) -> Array: ... + +# Rounding operations +def ceil(x) -> Array: ... +def floor(x) -> Array: ... +def round(x) -> Array: ... + +# Complex number operations +def complex(real, imag) -> Array: + """Create complex array from real and imaginary parts.""" + +def conj(x) -> Array: + """Complex conjugate.""" + +def real(x) -> Array: + """Extract real part of complex array.""" + +def imag(x) -> Array: + """Extract imaginary part of complex array.""" +``` + +### Comparison Operations + +Element-wise comparison operations returning boolean arrays. + +```python { .api } +def eq(x, y) -> Array: + """Element-wise equality.""" + +def ne(x, y) -> Array: + """Element-wise inequality.""" + +def lt(x, y) -> Array: + """Element-wise less than.""" + +def le(x, y) -> Array: + """Element-wise less than or equal.""" + +def gt(x, y) -> Array: + """Element-wise greater than.""" + +def ge(x, y) -> Array: + """Element-wise greater than or equal.""" + +def is_finite(x) -> Array: + """Element-wise finite number test.""" +``` + +### Bitwise Operations + +Bitwise operations on integer arrays. + +```python { .api } +# Bitwise operations +def bitwise_and(x, y) -> Array: ... +def bitwise_or(x, y) -> Array: ... +def bitwise_xor(x, y) -> Array: ... +def bitwise_not(x) -> Array: ... + +# Bit shifting +def shift_left(x, y) -> Array: ... +def shift_right_logical(x, y) -> Array: ... +def shift_right_arithmetic(x, y) -> Array: ... + +# Bit manipulation +def clz(x) -> Array: + """Count leading zeros.""" + +def population_count(x) -> Array: + """Count set bits.""" +``` + +### Array Operations + +Shape manipulation, broadcasting, and array transformation operations. + +```python { .api } +def broadcast(operand, sizes) -> Array: + """Broadcast array by adding dimensions.""" + +def broadcast_in_dim(operand, shape, broadcast_dimensions) -> Array: + """Broadcast array into target shape.""" + +def reshape(operand, new_sizes, dimensions=None) -> Array: + """Reshape array to new dimensions.""" + +def transpose(operand, permutation) -> Array: + """Transpose array axes.""" + +def rev(operand, dimensions) -> Array: + """Reverse array along specified dimensions.""" + +def concatenate(operands, dimension) -> Array: + """Concatenate arrays along dimension.""" + +def pad(operand, padding_value, padding_config) -> Array: + """Pad array with constant value.""" + +def squeeze(array, dimensions) -> Array: + """Remove unit dimensions.""" + +def expand_dims(array, dimensions) -> Array: + """Add unit dimensions.""" +``` + +### Indexing and Slicing + +Advanced indexing operations for array access and updates. + +```python { .api } +def slice(operand, start_indices, limit_indices, strides=None) -> Array: + """Extract slice from array.""" + +def slice_in_dim(operand, start, limit, stride=1, axis=0) -> Array: + """Slice array along single dimension.""" + +def dynamic_slice(operand, start_indices, slice_sizes) -> Array: + """Extract slice with dynamic start indices.""" + +def dynamic_slice_in_dim(operand, start, size, axis=0) -> Array: + """Dynamic slice along single dimension.""" + +def dynamic_update_slice(operand, update, start_indices) -> Array: + """Update slice with dynamic start indices.""" + +def dynamic_update_slice_in_dim(operand, update, start, axis) -> Array: + """Dynamic update slice along single dimension.""" + +def gather( + operand, + start_indices, + dimension_numbers, + slice_sizes, + indices_are_sorted=False, + unique_indices=False, + mode=None, + fill_value=None +) -> Array: + """General gather operation for advanced indexing.""" + +def scatter( + operand, + scatter_indices, + updates, + dimension_numbers, + indices_are_sorted=False, + unique_indices=False, + mode=None +) -> Array: + """General scatter operation for advanced updates.""" + +# Scatter variants for different operations +def scatter_add(operand, scatter_indices, updates, dimension_numbers, **kwargs) -> Array: ... +def scatter_sub(operand, scatter_indices, updates, dimension_numbers, **kwargs) -> Array: ... +def scatter_mul(operand, scatter_indices, updates, dimension_numbers, **kwargs) -> Array: ... +def scatter_max(operand, scatter_indices, updates, dimension_numbers, **kwargs) -> Array: ... +def scatter_min(operand, scatter_indices, updates, dimension_numbers, **kwargs) -> Array: ... + +def index_in_dim(operand, index, axis=0, keepdims=True) -> Array: + """Index array along single dimension.""" + +def index_take(src, idxs, axes) -> Array: + """Take elements using multi-dimensional indices.""" +``` + +### Reduction Operations + +Reduce arrays along specified axes using various operations. + +```python { .api } +def reduce( + operand, + init_value, + computation, + dimensions +) -> Array: + """ + General reduction operation. + + Args: + operand: Array to reduce + init_value: Initial value for reduction + computation: Binary function for reduction + dimensions: Axes to reduce over + + Returns: + Reduced array + """ + +# Specialized reductions +def reduce_sum(operand, axes) -> Array: ... +def reduce_prod(operand, axes) -> Array: ... +def reduce_max(operand, axes) -> Array: ... +def reduce_min(operand, axes) -> Array: ... +def reduce_and(operand, axes) -> Array: ... +def reduce_or(operand, axes) -> Array: ... +def reduce_xor(operand, axes) -> Array: ... + +# Windowed reductions +def reduce_window( + operand, + init_value, + computation, + window_dimensions, + window_strides=None, + padding=None, + base_dilation=None, + window_dilation=None +) -> Array: + """ + Sliding window reduction. + + Args: + operand: Input array + init_value: Initial value for reduction + computation: Binary reduction function + window_dimensions: Size of sliding window + window_strides: Stride of sliding window + padding: Padding specification + base_dilation: Base dilation factor + window_dilation: Window dilation factor + + Returns: + Reduced array with window operation applied + """ +``` + +### Control Flow + +Conditional execution and loop constructs for dynamic computation graphs. + +```python { .api } +def cond(pred, true_fun, false_fun, *operands) -> Any: + """ + Conditional execution based on predicate. + + Args: + pred: Boolean scalar predicate + true_fun: Function to execute if pred is True + false_fun: Function to execute if pred is False + operands: Arguments to pass to selected function + + Returns: + Result of executing selected function + """ + +def select(pred, on_true, on_false) -> Array: + """Element-wise conditional selection.""" + +def select_n(which, *cases) -> Array: + """Multi-way conditional selection.""" + +def while_loop(cond_fun, body_fun, init_val) -> Any: + """ + While loop with condition and body functions. + + Args: + cond_fun: Function that returns boolean condition + body_fun: Function that updates loop state + init_val: Initial loop state + + Returns: + Final loop state after termination + """ + +def fori_loop(lower, upper, body_fun, init_val) -> Any: + """ + For loop over range with body function. + + Args: + lower: Loop start index + upper: Loop end index (exclusive) + body_fun: Function that updates state (takes index and state) + init_val: Initial loop state + + Returns: + Final loop state + """ + +def scan(f, init, xs, length=None, reverse=False, unroll=1) -> tuple[Any, Array]: + """ + Scan operation applying function over sequence. + + Args: + f: Function to apply (takes carry and input, returns new carry and output) + init: Initial carry value + xs: Input sequence + length: Length of sequence (inferred if None) + reverse: Whether to scan in reverse + unroll: Number of iterations to unroll + + Returns: + Tuple of (final_carry, outputs) + """ + +def associative_scan(fn, elems, reverse=False, axis=0) -> Array: + """ + Parallel associative scan operation. + + Args: + fn: Associative binary function + elems: Input sequence + reverse: Whether to scan in reverse + axis: Axis to scan along + + Returns: + Scanned results + """ + +def switch(index, branches, *operands) -> Any: + """ + Switch statement for multi-way branching. + + Args: + index: Integer index selecting branch + branches: List of functions (branches) + operands: Arguments to pass to selected branch + + Returns: + Result of executing selected branch + """ + +def map(f, xs) -> Array: + """Map function over leading axis of array.""" +``` + +### Cumulative Operations + +Cumulative operations along array axes. + +```python { .api } +def cumsum(operand, axis=None, reverse=False) -> Array: + """Cumulative sum along axis.""" + +def cumprod(operand, axis=None, reverse=False) -> Array: + """Cumulative product along axis.""" + +def cummax(operand, axis=None, reverse=False) -> Array: + """Cumulative maximum along axis.""" + +def cummin(operand, axis=None, reverse=False) -> Array: + """Cumulative minimum along axis.""" + +def cumlogsumexp(operand, axis=None, reverse=False) -> Array: + """Cumulative log-sum-exp along axis.""" +``` + +### Linear Algebra + +Matrix operations and linear algebra primitives. + +```python { .api } +def dot(lhs, rhs, precision=None, preferred_element_type=None) -> Array: + """Matrix multiplication for 1D and 2D arrays.""" + +def dot_general( + lhs, + rhs, + dimension_numbers, + precision=None, + preferred_element_type=None +) -> Array: + """ + General matrix multiplication with custom contractions. + + Args: + lhs: Left-hand side array + rhs: Right-hand side array + dimension_numbers: Specification of contraction and batch dimensions + precision: Computation precision + preferred_element_type: Preferred output element type + + Returns: + Result of general matrix multiplication + """ + +def batch_matmul( + lhs, + rhs, + precision=None, + preferred_element_type=None +) -> Array: + """Batched matrix multiplication.""" + +class DotDimensionNumbers: + """Dimension specification for dot_general operation.""" + lhs_contracting_dimensions: tuple[int, ...] + rhs_contracting_dimensions: tuple[int, ...] + lhs_batch_dimensions: tuple[int, ...] + rhs_batch_dimensions: tuple[int, ...] +``` + +### Advanced Linear Algebra (lax.linalg) + +Advanced linear algebra operations from `jax.lax.linalg`. + +```python { .api } +def cholesky(a, *, symmetrize_input: bool = True) -> Array: + """ + Cholesky decomposition of positive definite matrix. + + Args: + a: Positive definite matrix + symmetrize_input: Whether to symmetrize input + + Returns: + Lower triangular Cholesky factor + """ + +def cholesky_update(r, u, *, alpha: float = 1.0) -> Array: + """ + Rank-1 update to Cholesky factorization. + + Args: + r: Cholesky factor + u: Update vector + alpha: Update coefficient + + Returns: + Updated Cholesky factor + """ + +def eig(a, *, compute_left_eigenvectors: bool = True, compute_right_eigenvectors: bool = True) -> tuple[Array, Array, Array]: + """ + Eigenvalue decomposition of general matrix. + + Args: + a: Input matrix + compute_left_eigenvectors: Whether to compute left eigenvectors + compute_right_eigenvectors: Whether to compute right eigenvectors + + Returns: + Tuple of (eigenvalues, left_eigenvectors, right_eigenvectors) + """ + +def eigh(a, *, lower: bool = True, symmetrize_input: bool = True, sort_eigenvalues: bool = True) -> tuple[Array, Array]: + """ + Eigenvalue decomposition of Hermitian matrix. + + Args: + a: Hermitian matrix + lower: Whether to use lower triangle + symmetrize_input: Whether to symmetrize input + sort_eigenvalues: Whether to sort eigenvalues + + Returns: + Tuple of (eigenvalues, eigenvectors) + """ + +def lu(a) -> tuple[Array, Array, Array]: + """ + LU decomposition with partial pivoting. + + Args: + a: Input matrix + + Returns: + Tuple of (lu_factors, pivots, permutation) + """ + +def qr(a, *, full_matrices: bool = True) -> tuple[Array, Array]: + """ + QR decomposition. + + Args: + a: Input matrix + full_matrices: Whether to return full or reduced QR + + Returns: + Tuple of (q, r) matrices + """ + +def svd(a, *, full_matrices: bool = True, compute_uv: bool = True, hermitian: bool = False) -> tuple[Array, Array, Array]: + """ + Singular value decomposition. + + Args: + a: Input matrix + full_matrices: Whether to return full or reduced SVD + compute_uv: Whether to compute U and V matrices + hermitian: Whether matrix is Hermitian + + Returns: + Tuple of (u, s, vh) where A = U @ diag(s) @ Vh + """ + +def schur(a, *, compute_schur_vectors: bool = True, sort_eigs: bool = False, select_callable=None) -> tuple[Array, Array]: + """ + Schur decomposition. + + Args: + a: Input matrix + compute_schur_vectors: Whether to compute Schur vectors + sort_eigs: Whether to sort eigenvalues + select_callable: Selection function for eigenvalues + + Returns: + Tuple of (schur_form, schur_vectors) + """ + +def hessenberg(a) -> tuple[Array, Array]: + """ + Hessenberg decomposition. + + Args: + a: Input matrix + + Returns: + Tuple of (hessenberg_form, orthogonal_matrix) + """ + +def triangular_solve(a, b, *, left_side: bool = True, lower: bool = True, transpose_a: bool = False, conjugate_a: bool = False, unit_diagonal: bool = False) -> Array: + """ + Solve triangular system of equations. + + Args: + a: Triangular matrix + b: Right-hand side + left_side: Whether A is on left side (Ax = b) or right (xA = b) + lower: Whether A is lower triangular + transpose_a: Whether to transpose A + conjugate_a: Whether to conjugate A + unit_diagonal: Whether A has unit diagonal + + Returns: + Solution to triangular system + """ + +def tridiagonal(a, d, *, lower: bool = True) -> tuple[Array, Array]: + """ + Tridiagonal reduction of symmetric matrix. + + Args: + a: Symmetric matrix + d: Diagonal elements + lower: Whether to use lower triangle + + Returns: + Tuple of (tridiagonal_matrix, orthogonal_matrix) + """ + +def tridiagonal_solve(dl, d, du, b) -> Array: + """ + Solve tridiagonal system using Thomas algorithm. + + Args: + dl: Lower diagonal + d: Main diagonal + du: Upper diagonal + b: Right-hand side + + Returns: + Solution to tridiagonal system + """ + +def qdwh(a, *, is_hermitian: bool = False, max_iterations: int = None, dynamic_shape: bool = False) -> tuple[Array, Array]: + """ + QDWH polar decomposition: A = UP where U is unitary, P is positive semidefinite. + + Args: + a: Input matrix + is_hermitian: Whether matrix is Hermitian + max_iterations: Maximum number of iterations + dynamic_shape: Whether to handle dynamic shapes + + Returns: + Tuple of (unitary_factor, positive_factor) + """ + +def householder_product(a, taus) -> Array: + """ + Compute product of Householder reflectors. + + Args: + a: Matrix containing Householder vectors + taus: Householder scaling factors + + Returns: + Product of Householder reflectors + """ + +def lu_pivots_to_permutation(pivots, permutation_size) -> Array: + """ + Convert LU pivots to permutation matrix. + + Args: + pivots: Pivot indices from LU decomposition + permutation_size: Size of permutation matrix + + Returns: + Permutation matrix + """ +``` + +### Convolution Operations + +Convolution operations for neural networks and signal processing. + +```python { .api } +def conv( + lhs, + rhs, + window_strides, + padding, + precision=None, + preferred_element_type=None +) -> Array: + """Basic convolution operation.""" + +def conv_general_dilated( + lhs, + rhs, + window_strides, + padding, + lhs_dilation=None, + rhs_dilation=None, + dimension_numbers=None, + feature_group_count=1, + batch_group_count=1, + precision=None, + preferred_element_type=None +) -> Array: + """ + General dilated convolution with full configuration options. + + Args: + lhs: Input array (N...HWC or NCHW... format) + rhs: Kernel array + window_strides: Convolution strides + padding: Padding specification + lhs_dilation: Input dilation + rhs_dilation: Kernel dilation (atrous convolution) + dimension_numbers: Dimension layout specification + feature_group_count: Number of feature groups + batch_group_count: Number of batch groups + precision: Computation precision + preferred_element_type: Preferred output type + + Returns: + Convolution result + """ + +def conv_transpose( + lhs, + rhs, + strides, + padding, + rhs_dilation=None, + dimension_numbers=None, + transpose_kernel=False, + precision=None, + preferred_element_type=None +) -> Array: + """Transposed (deconvolution) operation.""" + +class ConvDimensionNumbers: + """Convolution dimension number specification.""" + lhs_spec: tuple[int, ...] # Input dimension specification + rhs_spec: tuple[int, ...] # Kernel dimension specification + out_spec: tuple[int, ...] # Output dimension specification +``` + +### FFT Operations + +Fast Fourier Transform operations. + +```python { .api } +def fft(a, fft_type, fft_lengths) -> Array: + """ + Fast Fourier Transform. + + Args: + a: Input array + fft_type: Type of FFT (from FftType enum) + fft_lengths: Lengths of FFT dimensions + + Returns: + FFT result + """ + +class FftType: + """FFT type enumeration.""" + FFT = "FFT" + IFFT = "IFFT" + RFFT = "RFFT" + IRFFT = "IRFFT" +``` + +### Parallel Operations + +Multi-device communication primitives for distributed computing. + +```python { .api } +def all_gather(x, axis_name, *, axis_index_groups=None, tiled=False) -> Array: + """Gather values from all devices.""" + +def all_to_all(x, axis_name, split_axis, concat_axis, *, axis_index_groups=None, tiled=False) -> Array: + """All-to-all communication between devices.""" + +def psum(x, axis_name, *, axis_index_groups=None) -> Array: + """Parallel sum reduction across devices.""" + +def pmean(x, axis_name, *, axis_index_groups=None) -> Array: + """Parallel mean reduction across devices.""" + +def pmax(x, axis_name, *, axis_index_groups=None) -> Array: + """Parallel max reduction across devices.""" + +def pmin(x, axis_name, *, axis_index_groups=None) -> Array: + """Parallel min reduction across devices.""" + +def ppermute(x, axis_name, perm, *, axis_index_groups=None) -> Array: + """Permute data between devices.""" + +def axis_index(axis_name) -> Array: + """Get device index along named axis.""" + +def axis_size(axis_name) -> int: + """Get number of devices along named axis.""" + +def pbroadcast(x, axis_name, *, axis_index_groups=None) -> Array: + """Broadcast from first device to all others.""" +``` + +### Special Functions + +Special mathematical functions and probability distributions. + +```python { .api } +# Error functions +def erf(x) -> Array: ... +def erfc(x) -> Array: ... +def erf_inv(x) -> Array: ... + +# Gamma functions +def lgamma(x) -> Array: ... +def digamma(x) -> Array: ... +def polygamma(m, x) -> Array: ... + +# Bessel functions +def bessel_i0e(x) -> Array: ... +def bessel_i1e(x) -> Array: ... + +# Other special functions +def betainc(a, b, x) -> Array: ... +def igamma(a, x) -> Array: ... +def igammac(a, x) -> Array: ... +def zeta(x, q=None) -> Array: ... +``` + +### Type Conversion and Manipulation + +Array type conversion and data manipulation operations. + +```python { .api } +def convert_element_type(operand, new_dtype) -> Array: + """Convert array element type.""" + +def bitcast_convert_type(operand, new_dtype) -> Array: + """Bitcast array to new type without changing bit representation.""" + +def dtype(x) -> numpy.dtype: + """Get array data type.""" + +def full(shape, fill_value, dtype=None) -> Array: + """Create array filled with constant value.""" + +def full_like(x, fill_value, dtype=None, shape=None) -> Array: + """Create filled array with same properties as input.""" + +def iota(dtype, size) -> Array: + """Create array with sequential values (0, 1, 2, ...).""" + +def broadcasted_iota(dtype, shape, dimension) -> Array: + """Create iota array broadcasted to shape.""" +``` + +### Sorting Operations + +Sorting and selection operations. + +```python { .api } +def sort(operand, dimension=-1, is_stable=True) -> Array: + """Sort array along dimension.""" + +def sort_key_val(keys, values, dimension=-1, is_stable=True) -> tuple[Array, Array]: + """Sort key-value pairs.""" + +def top_k(operand, k) -> tuple[Array, Array]: + """Find top k largest elements and their indices.""" + +def argmax(operand, axis=None, index_dtype=int) -> Array: + """Indices of maximum values.""" + +def argmin(operand, axis=None, index_dtype=int) -> Array: + """Indices of minimum values.""" +``` + +### Miscellaneous Operations + +Additional utility operations and performance primitives. + +```python { .api } +def stop_gradient(x) -> Array: + """Stop gradient computation at this point.""" + +def optimization_barrier(x) -> Array: + """Prevent optimization across this point.""" + +def nextafter(x1, x2) -> Array: + """Next representable value after x1 in direction of x2.""" + +def reduce_precision(operand, exponent_bits, mantissa_bits) -> Array: + """Reduce floating-point precision.""" + +def create_token() -> Array: + """Create execution token for ordering side effects.""" + +def after_all(*tokens) -> Array: + """Create token that depends on all input tokens.""" + +# Random number generation primitives +def rng_uniform(a, b, shape, dtype=None) -> Array: + """Low-level uniform random number generation.""" + +def rng_bit_generator(key, shape, dtype=None, algorithm=None) -> tuple[Array, Array]: + """Low-level random bit generation.""" +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-jax/docs/neural-networks.md b/.tessl/tiles/tessl/pypi-jax/docs/neural-networks.md new file mode 100644 index 0000000..254291a --- /dev/null +++ b/.tessl/tiles/tessl/pypi-jax/docs/neural-networks.md @@ -0,0 +1,573 @@ +# Neural Network Functions + +JAX provides a comprehensive set of neural network functions through `jax.nn` including activation functions, normalization utilities, and attention mechanisms commonly used in machine learning and deep learning applications. + +## Core Imports + +```python +import jax.nn as jnn +from jax.nn import relu, sigmoid, softmax, gelu +``` + +## Capabilities + +### ReLU and Variants + +Rectified Linear Unit activations and their variants for introducing non-linearity while maintaining computational efficiency. + +```python { .api } +def relu(x) -> Array: + """ + Rectified Linear Unit activation: max(0, x). + + Args: + x: Input array + + Returns: + Array with ReLU applied element-wise + """ + +def relu6(x) -> Array: + """ + ReLU capped at 6: min(max(0, x), 6). + + Args: + x: Input array + + Returns: + Array with ReLU6 applied element-wise + """ + +def leaky_relu(x, negative_slope=0.01) -> Array: + """ + Leaky ReLU: max(negative_slope * x, x). + + Args: + x: Input array + negative_slope: Slope for negative values (default: 0.01) + + Returns: + Array with Leaky ReLU applied element-wise + """ + +def elu(x, alpha=1.0) -> Array: + """ + Exponential Linear Unit: x if x > 0 else alpha * (exp(x) - 1). + + Args: + x: Input array + alpha: Scale for negative values (default: 1.0) + + Returns: + Array with ELU applied element-wise + """ + +def selu(x) -> Array: + """ + Scaled Exponential Linear Unit with fixed alpha and scale. + + Args: + x: Input array + + Returns: + Array with SELU applied element-wise + """ + +def celu(x, alpha=1.0) -> Array: + """ + Continuously Differentiable Exponential Linear Unit. + + Args: + x: Input array + alpha: Scale parameter (default: 1.0) + + Returns: + Array with CELU applied element-wise + """ +``` + +### Modern Activations + +Contemporary activation functions that have shown improved performance in various architectures. + +```python { .api } +def gelu(x, approximate=True) -> Array: + """ + Gaussian Error Linear Unit: x * Φ(x) where Φ is CDF of standard normal. + + Args: + x: Input array + approximate: Whether to use tanh approximation (default: True) + + Returns: + Array with GELU applied element-wise + """ + +def silu(x) -> Array: + """ + Sigmoid Linear Unit (Swish): x * sigmoid(x). + + Args: + x: Input array + + Returns: + Array with SiLU applied element-wise + """ + +def swish(x) -> Array: + """ + Swish activation (alias for SiLU): x * sigmoid(x). + + Args: + x: Input array + + Returns: + Array with Swish applied element-wise + """ + +def mish(x) -> Array: + """ + Mish activation: x * tanh(softplus(x)). + + Args: + x: Input array + + Returns: + Array with Mish applied element-wise + """ + +def hard_silu(x) -> Array: + """ + Hard SiLU (Hard Swish variant): x * hard_sigmoid(x). + + Args: + x: Input array + + Returns: + Array with Hard SiLU applied element-wise + """ + +def hard_swish(x) -> Array: + """ + Hard Swish: x * relu6(x + 3) / 6. + + Args: + x: Input array + + Returns: + Array with Hard Swish applied element-wise + """ + +def squareplus(x, b=4.0) -> Array: + """ + Squareplus activation: (x + sqrt(x^2 + b)) / 2. + + Args: + x: Input array + b: Shape parameter (default: 4.0) + + Returns: + Array with Squareplus applied element-wise + """ +``` + +### Sigmoid and Tanh Variants + +Sigmoid-based activations and their approximations for bounded outputs. + +```python { .api } +def sigmoid(x) -> Array: + """ + Sigmoid activation: 1 / (1 + exp(-x)). + + Args: + x: Input array + + Returns: + Array with sigmoid applied element-wise + """ + +def hard_sigmoid(x) -> Array: + """ + Hard sigmoid approximation: max(0, min(1, (x + 1) / 2)). + + Args: + x: Input array + + Returns: + Array with hard sigmoid applied element-wise + """ + +def log_sigmoid(x) -> Array: + """ + Log sigmoid: log(sigmoid(x)) computed in numerically stable way. + + Args: + x: Input array + + Returns: + Array with log sigmoid applied element-wise + """ + +def soft_sign(x) -> Array: + """ + Soft sign activation: x / (1 + |x|). + + Args: + x: Input array + + Returns: + Array with soft sign applied element-wise + """ + +def tanh(x) -> Array: + """ + Hyperbolic tangent activation. + + Args: + x: Input array + + Returns: + Array with tanh applied element-wise + """ + +def hard_tanh(x) -> Array: + """ + Hard tanh activation: max(-1, min(1, x)). + + Args: + x: Input array + + Returns: + Array with hard tanh applied element-wise + """ +``` + +### Softmax and Normalization + +Normalization functions for probability distributions and feature standardization. + +```python { .api } +def softmax(x, axis=-1, where=None, initial=None) -> Array: + """ + Softmax activation: exp(x_i) / sum(exp(x)) along axis. + + Args: + x: Input array + axis: Axis to apply softmax along (default: -1) + where: Mask for conditional computation + initial: Initial value for reduction + + Returns: + Array with softmax applied along specified axis + """ + +def log_softmax(x, axis=-1, where=None, initial=None) -> Array: + """ + Log softmax: log(softmax(x)) computed in numerically stable way. + + Args: + x: Input array + axis: Axis to apply log softmax along (default: -1) + where: Mask for conditional computation + initial: Initial value for reduction + + Returns: + Array with log softmax applied along specified axis + """ + +def softplus(x) -> Array: + """ + Softplus activation: log(1 + exp(x)). + + Args: + x: Input array + + Returns: + Array with softplus applied element-wise + """ + +def standardize(x, axis=None, mean=None, variance=None, epsilon=1e-5) -> Array: + """ + Standardize array to zero mean and unit variance. + + Args: + x: Input array to standardize + axis: Axis to compute statistics along + mean: Pre-computed mean (computed if None) + variance: Pre-computed variance (computed if None) + epsilon: Small value for numerical stability + + Returns: + Standardized array + """ + +def glu(x, axis=-1) -> Array: + """ + Gated Linear Unit: split x in half along axis, return a * sigmoid(b). + + Args: + x: Input array (size along axis must be even) + axis: Axis to split along (default: -1) + + Returns: + Array with GLU applied + """ +``` + +### Specialized Functions + +Utility functions for neural network operations and transformations. + +```python { .api } +def one_hot(x, num_classes, dtype=None, axis=-1) -> Array: + """ + One-hot encode array of integers. + + Args: + x: Integer array to encode + num_classes: Number of classes + dtype: Output data type + axis: Axis to insert one-hot dimension + + Returns: + One-hot encoded array + """ + +def logsumexp(a, axis=None, b=None, keepdims=False, return_sign=False, where=None) -> Array: + """ + Compute log(sum(exp(a))) in numerically stable way. + + Args: + a: Input array + axis: Axis to sum along + b: Scaling factor array + keepdims: Whether to keep reduced dimensions + return_sign: Whether to return sign separately + where: Mask for conditional computation + + Returns: + Log-sum-exp result + """ + +def logmeanexp(a, axis=None, b=None, keepdims=False, where=None) -> Array: + """ + Compute log(mean(exp(a))) in numerically stable way. + + Args: + a: Input array + axis: Axis to average along + b: Scaling factor array + keepdims: Whether to keep reduced dimensions + where: Mask for conditional computation + + Returns: + Log-mean-exp result + """ + +def log1mexp(x) -> Array: + """ + Compute log(1 - exp(x)) in numerically stable way. + + Args: + x: Input array (should be <= 0) + + Returns: + Array with log(1 - exp(x)) applied element-wise + """ + +def sparse_plus(x, y) -> Array: + """ + Sparse-aware addition that handles missing values. + + Args: + x: First input array + y: Second input array + + Returns: + Element-wise addition result + """ + +def sparse_sigmoid(x) -> Array: + """ + Sparse-aware sigmoid activation. + + Args: + x: Input array + + Returns: + Sigmoid activation with sparse support + """ +``` + +### Attention Mechanisms + +Attention functions for transformer and neural attention models. + +```python { .api } +def dot_product_attention( + query, + key, + value, + bias=None, + mask=None, + broadcast_dropout=True, + dropout_rng=None, + dropout_rate=0.0, + deterministic=False, + dtype=None, + precision=None +) -> Array: + """ + Dot-product attention mechanism. + + Args: + query: Query array (..., length_q, depth_q) + key: Key array (..., length_kv, depth_q) + value: Value array (..., length_kv, depth_v) + bias: Optional attention bias + mask: Optional attention mask + broadcast_dropout: Whether to broadcast dropout + dropout_rng: Random key for dropout + dropout_rate: Dropout probability + deterministic: Whether to use deterministic mode + dtype: Output data type + precision: Computation precision + + Returns: + Attention output array (..., length_q, depth_v) + """ + +def scaled_dot_general( + lhs, + rhs, + dimension_numbers, + alpha=1.0, + precision=None, + preferred_element_type=None +) -> Array: + """ + Scaled general dot product for attention computations. + + Args: + lhs: Left-hand side array + rhs: Right-hand side array + dimension_numbers: Contraction specification + alpha: Scaling factor + precision: Computation precision + preferred_element_type: Preferred output type + + Returns: + Scaled dot product result + """ + +def scaled_matmul( + a, + b, + alpha=1.0, + precision=None, + preferred_element_type=None +) -> Array: + """ + Scaled matrix multiplication: alpha * (a @ b). + + Args: + a: First matrix + b: Second matrix + alpha: Scaling factor + precision: Computation precision + preferred_element_type: Preferred output type + + Returns: + Scaled matrix multiplication result + """ + +def get_scaled_dot_general_config() -> dict: + """ + Get configuration for scaled dot product attention. + + Returns: + Configuration dictionary for attention operations + """ +``` + +### Utility Functions + +Additional utilities for neural network operations. + +```python { .api } +def identity(x) -> Array: + """ + Identity function that returns input unchanged. + + Args: + x: Input array + + Returns: + Input array unchanged + """ +``` + +## Neural Network Initializers + +JAX provides weight initialization functions through `jax.nn.initializers`: + +```python { .api } +import jax.nn.initializers as init + +# Standard initializers +init.zeros(key, shape, dtype=jnp.float32) -> Array +init.ones(key, shape, dtype=jnp.float32) -> Array +init.constant(value, dtype=jnp.float32) -> Callable + +# Random initializers +init.uniform(scale=1e-2, dtype=jnp.float32) -> Callable +init.normal(stddev=1e-2, dtype=jnp.float32) -> Callable +init.truncated_normal(stddev=1e-2, dtype=jnp.float32) -> Callable + +# Variance scaling initializers +init.variance_scaling(scale, mode, distribution, dtype=jnp.float32) -> Callable +init.glorot_uniform(dtype=jnp.float32) -> Callable +init.glorot_normal(dtype=jnp.float32) -> Callable +init.lecun_uniform(dtype=jnp.float32) -> Callable +init.lecun_normal(dtype=jnp.float32) -> Callable +init.he_uniform(dtype=jnp.float32) -> Callable +init.he_normal(dtype=jnp.float32) -> Callable + +# Orthogonal initializer +init.orthogonal(scale=1.0, column_axis=-1, dtype=jnp.float32) -> Callable + +# Delta orthogonal initializer (for RNNs) +init.delta_orthogonal(scale=1.0, column_axis=-1, dtype=jnp.float32) -> Callable +``` + +Usage examples: + +```python +import jax +import jax.numpy as jnp +import jax.nn as jnn +from jax.nn import initializers as init + +# Initialize weights +key = jax.random.key(42) +weights = init.glorot_uniform()(key, (784, 128)) +biases = init.zeros(key, (128,)) + +# Apply activations in a simple neural network layer +def dense_layer(x, weights, biases): + return jnn.relu(x @ weights + biases) + +# Multi-layer example with different activations +def mlp(x, params): + x = jnn.relu(x @ params['w1'] + params['b1']) + x = jnn.gelu(x @ params['w2'] + params['b2']) + x = jnn.softmax(x @ params['w3'] + params['b3']) + return x + +# Attention example +def simple_attention(q, k, v): + # Scaled dot-product attention + scores = jnn.dot_product_attention(q, k, v) + return scores +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-jax/docs/numpy-compatibility.md b/.tessl/tiles/tessl/pypi-jax/docs/numpy-compatibility.md new file mode 100644 index 0000000..a8cf74c --- /dev/null +++ b/.tessl/tiles/tessl/pypi-jax/docs/numpy-compatibility.md @@ -0,0 +1,566 @@ +# NumPy Compatibility API + +JAX provides a comprehensive NumPy-compatible API through `jax.numpy` (commonly imported as `jnp`). JAX arrays are immutable and support the full NumPy API with added benefits of JIT compilation, automatic differentiation, and device acceleration. + +## Core Imports + +```python +import jax.numpy as jnp +import jax +``` + +## Capabilities + +### Array Creation + +Create JAX arrays from various data sources and specifications. + +```python { .api } +def array(object, dtype=None, copy=None, order=None, ndmin=0) -> Array: + """Create array from array-like object.""" + +def asarray(a, dtype=None, order=None) -> Array: + """Convert input to array.""" + +def zeros(shape, dtype=None) -> Array: + """Create array filled with zeros.""" + +def zeros_like(a, dtype=None, shape=None) -> Array: + """Create zeros array with same shape as input.""" + +def ones(shape, dtype=None) -> Array: + """Create array filled with ones.""" + +def ones_like(a, dtype=None, shape=None) -> Array: + """Create ones array with same shape as input.""" + +def full(shape, fill_value, dtype=None) -> Array: + """Create array filled with constant value.""" + +def full_like(a, fill_value, dtype=None, shape=None) -> Array: + """Create filled array with same shape as input.""" + +def empty(shape, dtype=None) -> Array: + """Create uninitialized array.""" + +def empty_like(a, dtype=None, shape=None) -> Array: + """Create empty array with same shape as input.""" + +def eye(N, M=None, k=0, dtype=None) -> Array: + """Create identity matrix.""" + +def identity(n, dtype=None) -> Array: + """Create square identity matrix.""" + +def arange(start, stop=None, step=None, dtype=None) -> Array: + """Create evenly spaced values within interval.""" + +def linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None, axis=0) -> Array: + """Create evenly spaced numbers over interval.""" + +def logspace(start, stop, num=50, endpoint=True, base=10.0, dtype=None, axis=0) -> Array: + """Create numbers spaced evenly on log scale.""" + +def geomspace(start, stop, num=50, endpoint=True, dtype=None, axis=0) -> Array: + """Create numbers spaced evenly on log scale (geometric progression).""" + +def meshgrid(*xi, copy=True, sparse=False, indexing='xy') -> list[Array]: + """Create coordinate matrices from coordinate vectors.""" + +def mgrid() -> MGridClass: + """Multi-dimensional mesh creation.""" + +def ogrid() -> OGridClass: + """Open multi-dimensional mesh creation.""" + +def indices(dimensions, dtype=int, sparse=False) -> Array: + """Create arrays of indices.""" + +def tri(N, M=None, k=0, dtype=None) -> Array: + """Create array with ones at and below diagonal.""" +``` + +### Mathematical Functions + +Element-wise mathematical operations following NumPy conventions. + +```python { .api } +# Arithmetic operations +def add(x1, x2) -> Array: ... +def subtract(x1, x2) -> Array: ... +def multiply(x1, x2) -> Array: ... +def divide(x1, x2) -> Array: ... +def true_divide(x1, x2) -> Array: ... +def floor_divide(x1, x2) -> Array: ... +def power(x1, x2) -> Array: ... +def float_power(x1, x2) -> Array: ... +def mod(x1, x2) -> Array: ... +def remainder(x1, x2) -> Array: ... +def divmod(x1, x2) -> tuple[Array, Array]: ... + +# Trigonometric functions +def sin(x) -> Array: ... +def cos(x) -> Array: ... +def tan(x) -> Array: ... +def asin(x) -> Array: ... +def acos(x) -> Array: ... +def atan(x) -> Array: ... +def atan2(x1, x2) -> Array: ... +def sinh(x) -> Array: ... +def cosh(x) -> Array: ... +def tanh(x) -> Array: ... +def asinh(x) -> Array: ... +def acosh(x) -> Array: ... +def atanh(x) -> Array: ... +def degrees(x) -> Array: ... +def radians(x) -> Array: ... +def deg2rad(x) -> Array: ... +def rad2deg(x) -> Array: ... + +# Exponential and logarithmic +def exp(x) -> Array: ... +def exp2(x) -> Array: ... +def expm1(x) -> Array: ... +def log(x) -> Array: ... +def log10(x) -> Array: ... +def log2(x) -> Array: ... +def log1p(x) -> Array: ... + +# Rounding and precision +def round(a, decimals=0) -> Array: ... +def rint(x) -> Array: ... +def fix(x) -> Array: ... +def floor(x) -> Array: ... +def ceil(x) -> Array: ... +def trunc(x) -> Array: ... + +# Arithmetic functions +def abs(x) -> Array: ... +def absolute(x) -> Array: ... +def fabs(x) -> Array: ... +def sign(x) -> Array: ... +def signbit(x) -> Array: ... +def copysign(x1, x2) -> Array: ... +def sqrt(x) -> Array: ... +def square(x) -> Array: ... +def cbrt(x) -> Array: ... +def reciprocal(x) -> Array: ... +def positive(x) -> Array: ... +def negative(x) -> Array: ... + +# Extrema functions +def maximum(x1, x2) -> Array: ... +def minimum(x1, x2) -> Array: ... +def fmax(x1, x2) -> Array: ... +def fmin(x1, x2) -> Array: ... +def clip(a, a_min=None, a_max=None) -> Array: ... + +# Complex number functions +def real(val) -> Array: ... +def imag(val) -> Array: ... +def conj(x) -> Array: ... +def conjugate(x) -> Array: ... +def angle(z, deg=False) -> Array: ... +def isreal(x) -> Array: ... +def iscomplex(x) -> Array: ... + +# Floating point functions +def isfinite(x) -> Array: ... +def isinf(x) -> Array: ... +def isnan(x) -> Array: ... +def isneginf(x) -> Array: ... +def isposinf(x) -> Array: ... +def nextafter(x1, x2) -> Array: ... +def spacing(x) -> Array: ... +def modf(x) -> tuple[Array, Array]: ... +def frexp(x) -> tuple[Array, Array]: ... +def ldexp(x1, x2) -> Array: ... +``` + +### Array Manipulation + +Functions for reshaping, combining, and transforming arrays. + +```python { .api } +# Shape manipulation +def reshape(a, newshape, order='C') -> Array: ... +def ravel(a, order='C') -> Array: ... +def flatten(a, order='C') -> Array: ... + +# Transpose operations +def transpose(a, axes=None) -> Array: ... +def swapaxes(a, axis1, axis2) -> Array: ... +def moveaxis(a, source, destination) -> Array: ... +def rollaxis(a, axis, start=0) -> Array: ... + +# Dimension manipulation +def expand_dims(a, axis) -> Array: ... +def squeeze(a, axis=None) -> Array: ... + +# Array reversal and rotation +def flip(m, axis=None) -> Array: ... +def fliplr(m) -> Array: ... +def flipud(m) -> Array: ... +def rot90(m, k=1, axes=(0, 1)) -> Array: ... +def roll(a, shift, axis=None) -> Array: ... + +# Broadcasting +def broadcast_to(array, shape) -> Array: ... +def broadcast_arrays(*args) -> list[Array]: ... + +# Joining arrays +def concatenate(arrays, axis=0) -> Array: ... +def stack(arrays, axis=0) -> Array: ... +def vstack(tup) -> Array: ... +def hstack(tup) -> Array: ... +def dstack(tup) -> Array: ... +def column_stack(tup) -> Array: ... +def append(arr, values, axis=None) -> Array: ... + +# Splitting arrays +def split(ary, indices_or_sections, axis=0) -> list[Array]: ... +def array_split(ary, indices_or_sections, axis=0) -> list[Array]: ... +def hsplit(ary, indices_or_sections) -> list[Array]: ... +def vsplit(ary, indices_or_sections) -> list[Array]: ... +def dsplit(ary, indices_or_sections) -> list[Array]: ... + +# Tiling and repeating +def tile(A, reps) -> Array: ... +def repeat(a, repeats, axis=None) -> Array: ... + +# Array modification +def insert(arr, obj, values, axis=None) -> Array: ... +def delete(arr, obj, axis=None) -> Array: ... +def place(arr, mask, vals) -> None: ... +def put(a, ind, v, mode='raise') -> None: ... +def put_along_axis(arr, indices, values, axis) -> None: ... + +def unique(ar, return_index=False, return_inverse=False, return_counts=False, axis=None, equal_nan=True) -> Array: ... +``` + +### Indexing and Selection + +Advanced indexing, selection, and conditional operations. + +```python { .api } +def take(a, indices, axis=None, mode=None) -> Array: + """Take elements from array along axis.""" + +def take_along_axis(arr, indices, axis) -> Array: + """Take values from array using indices along axis.""" + +def choose(a, choices, mode='raise') -> Array: + """Construct array from index array and choice arrays.""" + +def compress(condition, a, axis=None) -> Array: + """Return selected slices along axis.""" + +def extract(condition, arr) -> Array: + """Return elements satisfying condition.""" + +def select(condlist, choicelist, default=0) -> Array: + """Return elements chosen from choicelist based on conditions.""" + +def where(condition, x=None, y=None) -> Array: + """Return elements chosen from x or y based on condition.""" + +def nonzero(a) -> tuple[Array, ...]: + """Return indices of non-zero elements.""" + +def argwhere(a) -> Array: + """Return indices where condition is True.""" + +def flatnonzero(a) -> Array: + """Return indices of flattened array that are non-zero.""" + +def ix_(*args) -> tuple[Array, ...]: + """Construct open mesh from multiple sequences.""" +``` + +### Reduction Operations + +Functions that reduce arrays along axes or compute aggregates. + +```python { .api } +# Basic reductions +def sum(a, axis=None, dtype=None, keepdims=False, initial=None, where=None) -> Array: ... +def prod(a, axis=None, dtype=None, keepdims=False, initial=None, where=None) -> Array: ... +def mean(a, axis=None, dtype=None, keepdims=False, where=None) -> Array: ... +def median(a, axis=None, keepdims=False) -> Array: ... +def std(a, axis=None, dtype=None, ddof=0, keepdims=False, where=None) -> Array: ... +def var(a, axis=None, dtype=None, ddof=0, keepdims=False, where=None) -> Array: ... + +# Extrema +def min(a, axis=None, keepdims=False, initial=None, where=None) -> Array: ... +def max(a, axis=None, keepdims=False, initial=None, where=None) -> Array: ... +def amin(a, axis=None, keepdims=False, initial=None, where=None) -> Array: ... +def amax(a, axis=None, keepdims=False, initial=None, where=None) -> Array: ... +def ptp(a, axis=None, keepdims=False) -> Array: ... + +# Percentiles and quantiles +def percentile(a, q, axis=None, method='linear', keepdims=False) -> Array: ... +def quantile(a, q, axis=None, method='linear', keepdims=False) -> Array: ... + +# Cumulative operations +def cumsum(a, axis=None, dtype=None) -> Array: ... +def cumprod(a, axis=None, dtype=None) -> Array: ... + +# Logical reductions +def all(a, axis=None, keepdims=False, where=None) -> Array: ... +def any(a, axis=None, keepdims=False, where=None) -> Array: ... + +# Counting +def count_nonzero(a, axis=None, keepdims=False) -> Array: ... + +# NaN-aware reductions +def nansum(a, axis=None, dtype=None, keepdims=False, where=None) -> Array: ... +def nanprod(a, axis=None, dtype=None, keepdims=False, where=None) -> Array: ... +def nanmean(a, axis=None, dtype=None, keepdims=False, where=None) -> Array: ... +def nanmedian(a, axis=None, keepdims=False) -> Array: ... +def nanstd(a, axis=None, dtype=None, ddof=0, keepdims=False, where=None) -> Array: ... +def nanvar(a, axis=None, dtype=None, ddof=0, keepdims=False, where=None) -> Array: ... +def nanmin(a, axis=None, keepdims=False, initial=None, where=None) -> Array: ... +def nanmax(a, axis=None, keepdims=False, initial=None, where=None) -> Array: ... +def nanpercentile(a, q, axis=None, method='linear', keepdims=False) -> Array: ... +def nanquantile(a, q, axis=None, method='linear', keepdims=False) -> Array: ... +def nancumsum(a, axis=None, dtype=None) -> Array: ... +def nancumprod(a, axis=None, dtype=None) -> Array: ... + +# Indices of extrema +def argmin(a, axis=None, keepdims=False) -> Array: ... +def argmax(a, axis=None, keepdims=False) -> Array: ... +def nanargmin(a, axis=None, keepdims=False) -> Array: ... +def nanargmax(a, axis=None, keepdims=False) -> Array: ... +``` + +### Linear Algebra + +Core linear algebra operations for matrix computations. + +```python { .api } +# Matrix multiplication +def dot(a, b) -> Array: ... +def matmul(x1, x2) -> Array: ... +def inner(a, b) -> Array: ... +def outer(a, b) -> Array: ... +def tensordot(a, b, axes=2) -> Array: ... +def kron(a, b) -> Array: ... + +# Vector operations +def vdot(a, b) -> Array: ... +def cross(a, b, axisa=-1, axisb=-1, axisc=-1, axis=None) -> Array: ... + +# Matrix operations +def trace(a, offset=0, axis1=0, axis2=1, dtype=None) -> Array: ... +def diagonal(a, offset=0, axis1=0, axis2=1) -> Array: ... +def diag(v, k=0) -> Array: ... +def diagflat(v, k=0) -> Array: ... + +# Triangular matrices +def tril(m, k=0) -> Array: ... +def triu(m, k=0) -> Array: ... +def tril_indices(n, k=0, m=None) -> tuple[Array, Array]: ... +def triu_indices(n, k=0, m=None) -> tuple[Array, Array]: ... +def diag_indices(n, ndim=2) -> tuple[Array, ...]: ... + +# Matrix transpose +def matrix_transpose(x) -> Array: ... +``` + +### Sorting and Searching + +Functions for sorting arrays and searching for values. + +```python { .api } +def sort(a, axis=-1, kind='stable', order=None) -> Array: ... +def argsort(a, axis=-1, kind='stable', order=None) -> Array: ... +def lexsort(keys, axis=-1) -> Array: ... +def partition(a, kth, axis=-1, kind='introselect', order=None) -> Array: ... +def argpartition(a, kth, axis=-1, kind='introselect', order=None) -> Array: ... +def searchsorted(a, v, side='left', sorter=None) -> Array: ... +def sort_complex(a) -> Array: ... +``` + +### Set Operations + +Set-like operations on arrays. + +```python { .api } +def unique(ar, return_index=False, return_inverse=False, return_counts=False, axis=None) -> Array: ... +def intersect1d(ar1, ar2, assume_unique=False, return_indices=False) -> Array: ... +def union1d(ar1, ar2) -> Array: ... +def setdiff1d(ar1, ar2, assume_unique=False) -> Array: ... +def setxor1d(ar1, ar2, assume_unique=False) -> Array: ... +def isin(element, test_elements, assume_unique=False, invert=False) -> Array: ... +``` + +### Statistical Functions + +Statistical analysis and distribution functions. + +```python { .api } +def bincount(x, weights=None, minlength=0, length=None) -> Array: ... +def histogram(a, bins=10, range=None, weights=None, density=None) -> tuple[Array, Array]: ... +def histogram2d(x, y, bins=10, range=None, weights=None, density=None) -> tuple[Array, Array, Array]: ... +def histogramdd(sample, bins=10, range=None, weights=None, density=None) -> tuple[Array, list[Array]]: ... +def histogram_bin_edges(a, bins=10, range=None, weights=None) -> Array: ... +def digitize(x, bins, right=False) -> Array: ... +def average(a, axis=None, weights=None, returned=False, keepdims=False) -> Array: ... +def corrcoef(x, y=None, rowvar=True, dtype=None) -> Array: ... +def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None, dtype=None) -> Array: ... +def gradient(f, *varargs, axis=None, edge_order=1) -> Array: ... +``` + +### Data Types and Conversion + +Type information, checking, and conversion functions. + +```python { .api } +# Type checking +def issubdtype(arg1, arg2) -> bool: ... +def can_cast(from_, to, casting='safe') -> bool: ... +def result_type(*arrays_and_dtypes): ... +def promote_types(type1, type2): ... +def isscalar(element) -> bool: ... +def isrealobj(x) -> bool: ... +def iscomplexobj(x) -> bool: ... + +# Type information +def finfo(dtype): ... +def iinfo(dtype): ... + +# Array properties +def ndim(a) -> int: ... +def shape(a) -> tuple: ... +def size(a) -> int: ... + +# Comparison functions +def allclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False) -> bool: ... +def isclose(a, b, rtol=1e-05, atol=1e-08, equal_nan=False) -> Array: ... +def array_equal(a1, a2, equal_nan=False) -> bool: ... +def array_equiv(a1, a2) -> bool: ... + +# Utility functions +def copy(a, order='K') -> Array: ... +def nan_to_num(x, copy=True, nan=0.0, posinf=None, neginf=None) -> Array: ... +``` + +### Comparison Operations + +Element-wise comparison functions returning boolean arrays. + +```python { .api } +def equal(x1, x2) -> Array: ... +def not_equal(x1, x2) -> Array: ... +def less(x1, x2) -> Array: ... +def less_equal(x1, x2) -> Array: ... +def greater(x1, x2) -> Array: ... +def greater_equal(x1, x2) -> Array: ... +``` + +### Logical Operations + +Element-wise logical operations on boolean arrays. + +```python { .api } +def logical_and(x1, x2) -> Array: ... +def logical_or(x1, x2) -> Array: ... +def logical_not(x) -> Array: ... +def logical_xor(x1, x2) -> Array: ... +``` + +### Bitwise Operations + +Element-wise bitwise operations on integer arrays. + +```python { .api } +def bitwise_and(x1, x2) -> Array: ... +def bitwise_or(x1, x2) -> Array: ... +def bitwise_xor(x1, x2) -> Array: ... +def bitwise_not(x) -> Array: ... +def bitwise_left_shift(x1, x2) -> Array: ... +def bitwise_right_shift(x1, x2) -> Array: ... +def left_shift(x1, x2) -> Array: ... +def right_shift(x1, x2) -> Array: ... +def invert(x) -> Array: ... +def bitwise_count(x) -> Array: ... +``` + +### Constants and Special Values + +Mathematical and numerical constants. + +```python { .api } +pi: float # π (3.14159...) +e: float # Euler's number (2.71828...) +euler_gamma: float # Euler-Mascheroni constant +inf: float # Positive infinity +nan: float # Not a Number +newaxis: None # Used for adding dimensions in indexing +``` + +## NumPy Submodules + +### FFT Operations + +```python { .api } +import jax.numpy.fft as jfft + +# 1D transforms +jfft.fft(a, n=None, axis=-1, norm=None) -> Array +jfft.ifft(a, n=None, axis=-1, norm=None) -> Array +jfft.rfft(a, n=None, axis=-1, norm=None) -> Array +jfft.irfft(a, n=None, axis=-1, norm=None) -> Array + +# 2D transforms +jfft.fft2(a, s=None, axes=(-2, -1), norm=None) -> Array +jfft.ifft2(a, s=None, axes=(-2, -1), norm=None) -> Array +jfft.rfft2(a, s=None, axes=(-2, -1), norm=None) -> Array +jfft.irfft2(a, s=None, axes=(-2, -1), norm=None) -> Array + +# N-D transforms +jfft.fftn(a, s=None, axes=None, norm=None) -> Array +jfft.ifftn(a, s=None, axes=None, norm=None) -> Array +jfft.rfftn(a, s=None, axes=None, norm=None) -> Array +jfft.irfftn(a, s=None, axes=None, norm=None) -> Array + +# Helper functions +jfft.fftfreq(n, d=1.0) -> Array +jfft.rfftfreq(n, d=1.0) -> Array +jfft.fftshift(x, axes=None) -> Array +jfft.ifftshift(x, axes=None) -> Array +``` + +### Linear Algebra Operations + +```python { .api } +import jax.numpy.linalg as jla + +# Matrix decompositions +jla.cholesky(a) -> Array +jla.qr(a, mode='reduced') -> tuple[Array, Array] +jla.svd(a, full_matrices=True, compute_uv=True, hermitian=False) -> tuple[Array, Array, Array] +jla.eig(a) -> tuple[Array, Array] +jla.eigh(a, UPLO='L') -> tuple[Array, Array] +jla.eigvals(a) -> Array +jla.eigvalsh(a, UPLO='L') -> Array + +# Matrix properties +jla.det(a) -> Array +jla.slogdet(a) -> tuple[Array, Array] +jla.matrix_rank(M, tol=None, hermitian=False) -> Array +jla.trace(a, offset=0, axis1=0, axis2=1, dtype=None) -> Array + +# Matrix solutions +jla.solve(a, b) -> Array +jla.lstsq(a, b, rcond=None) -> tuple[Array, Array, Array, Array] +jla.inv(a) -> Array +jla.pinv(a, rcond=None, hermitian=False) -> Array + +# Norms and distances +jla.norm(x, ord=None, axis=None, keepdims=False) -> Array +jla.cond(x, p=None) -> Array + +# Matrix functions +jla.matrix_power(a, n) -> Array +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-jax/docs/random-numbers.md b/.tessl/tiles/tessl/pypi-jax/docs/random-numbers.md new file mode 100644 index 0000000..a34205c --- /dev/null +++ b/.tessl/tiles/tessl/pypi-jax/docs/random-numbers.md @@ -0,0 +1,803 @@ +# Random Number Generation + +JAX uses a functional approach to pseudo-random number generation with explicit key management. This design enables reproducibility, parallelization, and vectorization while avoiding global state typical of other libraries. + +## Core Imports + +```python +import jax.random as jr +from jax.random import key, split, normal, uniform +``` + +## Key Concepts + +JAX random numbers require explicit key management: +- Keys are created from integer seeds +- Keys must be split to generate independent random sequences +- Each random function consumes a key and returns deterministic output +- No global random state - all randomness is explicit + +## Capabilities + +### Key Management + +Generate, split, and manipulate PRNG keys for deterministic random number generation. + +```python { .api } +def key(seed: int, impl=None) -> Array: + """ + Create a typed PRNG key from integer seed. + + Args: + seed: Integer seed value + impl: PRNG implementation to use + + Returns: + PRNG key array + """ + +def PRNGKey(seed: int) -> Array: + """ + Create legacy PRNG key (uint32 format). + + Args: + seed: Integer seed value + + Returns: + Legacy format PRNG key + """ + +def split(key: Array, num: int = 2) -> Array: + """ + Split PRNG key into multiple independent keys. + + Args: + key: PRNG key to split + num: Number of keys to generate (default: 2) + + Returns: + Array of shape (num,) + key.shape containing new keys + """ + +def fold_in(key: Array, data: int) -> Array: + """ + Fold integer data into PRNG key. + + Args: + key: PRNG key + data: Integer to fold into key + + Returns: + New PRNG key with data folded in + """ + +def clone(key: Array) -> Array: + """ + Clone PRNG key for reuse. + + Args: + key: PRNG key to clone + + Returns: + Cloned PRNG key + """ + +def key_data(keys: Array) -> Array: + """ + Extract raw key data from PRNG keys. + + Args: + keys: PRNG key array + + Returns: + Raw key data + """ + +def wrap_key_data(key_data: Array, *, impl=None) -> Array: + """ + Wrap raw key data as PRNG keys. + + Args: + key_data: Raw key data + impl: PRNG implementation + + Returns: + PRNG key array + """ + +def key_impl(key: Array) -> str: + """ + Get PRNG implementation name for key. + + Args: + key: PRNG key + + Returns: + Implementation name string + """ +``` + +### Continuous Distributions + +Sample from continuous probability distributions. + +```python { .api } +def uniform( + key: Array, + shape=(), + dtype=float, + minval=0.0, + maxval=1.0 +) -> Array: + """ + Sample from uniform distribution. + + Args: + key: PRNG key + shape: Output shape + dtype: Output data type + minval: Minimum value (inclusive) + maxval: Maximum value (exclusive) + + Returns: + Random samples from uniform distribution + """ + +def normal(key: Array, shape=(), dtype=float) -> Array: + """ + Sample from standard normal (Gaussian) distribution. + + Args: + key: PRNG key + shape: Output shape + dtype: Output data type + + Returns: + Random samples from N(0, 1) + """ + +def multivariate_normal( + key: Array, + mean: Array, + cov: Array, + shape=(), + dtype=float, + method='cholesky' +) -> Array: + """ + Sample from multivariate normal distribution. + + Args: + key: PRNG key + mean: Mean vector + cov: Covariance matrix + shape: Batch shape + dtype: Output data type + method: Decomposition method ('cholesky', 'eigh', 'svd') + + Returns: + Random samples from multivariate normal + """ + +def truncated_normal( + key: Array, + lower: float, + upper: float, + shape=(), + dtype=float +) -> Array: + """ + Sample from truncated normal distribution. + + Args: + key: PRNG key + lower: Lower truncation bound + upper: Upper truncation bound + shape: Output shape + dtype: Output data type + + Returns: + Random samples from truncated normal + """ + +def beta(key: Array, a: Array, b: Array, shape=(), dtype=float) -> Array: + """ + Sample from beta distribution. + + Args: + key: PRNG key + a: Alpha parameter (concentration) + b: Beta parameter (concentration) + shape: Output shape + dtype: Output data type + + Returns: + Random samples from Beta(a, b) + """ + +def gamma(key: Array, a: Array, shape=(), dtype=float) -> Array: + """ + Sample from gamma distribution. + + Args: + key: PRNG key + a: Shape parameter + shape: Output shape + dtype: Output data type + + Returns: + Random samples from Gamma(a, 1) + """ + +def exponential(key: Array, shape=(), dtype=float) -> Array: + """ + Sample from exponential distribution. + + Args: + key: PRNG key + shape: Output shape + dtype: Output data type + + Returns: + Random samples from Exponential(1) + """ + +def laplace(key: Array, shape=(), dtype=float) -> Array: + """ + Sample from Laplace distribution. + + Args: + key: PRNG key + shape: Output shape + dtype: Output data type + + Returns: + Random samples from Laplace(0, 1) + """ + +def logistic(key: Array, shape=(), dtype=float) -> Array: + """ + Sample from logistic distribution. + + Args: + key: PRNG key + shape: Output shape + dtype: Output data type + + Returns: + Random samples from Logistic(0, 1) + """ + +def lognormal(key: Array, sigma=1.0, shape=(), dtype=float) -> Array: + """ + Sample from log-normal distribution. + + Args: + key: PRNG key + sigma: Standard deviation of underlying normal + shape: Output shape + dtype: Output data type + + Returns: + Random samples from log-normal distribution + """ + +def pareto(key: Array, b: Array, shape=(), dtype=float) -> Array: + """ + Sample from Pareto distribution. + + Args: + key: PRNG key + b: Shape parameter + shape: Output shape + dtype: Output data type + + Returns: + Random samples from Pareto(b, 1) + """ + +def cauchy(key: Array, shape=(), dtype=float) -> Array: + """ + Sample from Cauchy distribution. + + Args: + key: PRNG key + shape: Output shape + dtype: Output data type + + Returns: + Random samples from Cauchy(0, 1) + """ + +def double_sided_maxwell( + key: Array, + loc: Array, + scale: Array, + shape=(), + dtype=float +) -> Array: + """ + Sample from double-sided Maxwell distribution. + + Args: + key: PRNG key + loc: Location parameter + scale: Scale parameter + shape: Output shape + dtype: Output data type + + Returns: + Random samples from double-sided Maxwell + """ + +def maxwell(key: Array, shape=(), dtype=float) -> Array: + """ + Sample from Maxwell distribution. + + Args: + key: PRNG key + shape: Output shape + dtype: Output data type + + Returns: + Random samples from Maxwell distribution + """ + +def rayleigh(key: Array, scale=1.0, shape=(), dtype=float) -> Array: + """ + Sample from Rayleigh distribution. + + Args: + key: PRNG key + scale: Scale parameter + shape: Output shape + dtype: Output data type + + Returns: + Random samples from Rayleigh(scale) + """ + +def wald(key: Array, mean: Array, shape=(), dtype=float) -> Array: + """ + Sample from Wald (Inverse Gaussian) distribution. + + Args: + key: PRNG key + mean: Mean parameter + shape: Output shape + dtype: Output data type + + Returns: + Random samples from Wald distribution + """ + +def weibull_min( + key: Array, + concentration: Array, + scale=1.0, + shape=(), + dtype=float +) -> Array: + """ + Sample from Weibull minimum distribution. + + Args: + key: PRNG key + concentration: Shape parameter + scale: Scale parameter + shape: Output shape + dtype: Output data type + + Returns: + Random samples from Weibull minimum + """ + +def gumbel(key: Array, shape=(), dtype=float) -> Array: + """ + Sample from Gumbel distribution. + + Args: + key: PRNG key + shape: Output shape + dtype: Output data type + + Returns: + Random samples from Gumbel(0, 1) + """ + +def chisquare(key: Array, df: Array, shape=(), dtype=float) -> Array: + """ + Sample from chi-square distribution. + + Args: + key: PRNG key + df: Degrees of freedom + shape: Output shape + dtype: Output data type + + Returns: + Random samples from chi-square(df) + """ + +def dirichlet( + key: Array, + alpha: Array, + shape=(), + dtype=float +) -> Array: + """ + Sample from Dirichlet distribution. + + Args: + key: PRNG key + alpha: Concentration parameters + shape: Batch shape + dtype: Output data type + + Returns: + Random samples from Dirichlet(alpha) + """ + +def f(key: Array, dfnum: Array, dfden: Array, shape=(), dtype=float) -> Array: + """ + Sample from F-distribution. + + Args: + key: PRNG key + dfnum: Numerator degrees of freedom + dfden: Denominator degrees of freedom + shape: Output shape + dtype: Output data type + + Returns: + Random samples from F-distribution + """ + +def t(key: Array, df: Array, shape=(), dtype=float) -> Array: + """ + Sample from Student's t-distribution. + + Args: + key: PRNG key + df: Degrees of freedom + shape: Output shape + dtype: Output data type + + Returns: + Random samples from t-distribution + """ + +def triangular( + key: Array, + left: Array, + mode: Array, + right: Array, + shape=(), + dtype=float +) -> Array: + """ + Sample from triangular distribution. + + Args: + key: PRNG key + left: Left boundary + mode: Mode (peak) value + right: Right boundary + shape: Output shape + dtype: Output data type + + Returns: + Random samples from triangular distribution + """ + +def generalized_normal( + key: Array, + p: Array, + shape=(), + dtype=float +) -> Array: + """ + Sample from generalized normal distribution. + + Args: + key: PRNG key + p: Shape parameter + shape: Output shape + dtype: Output data type + + Returns: + Random samples from generalized normal + """ + +def loggamma(key: Array, a: Array, shape=(), dtype=float) -> Array: + """ + Sample log-gamma random variables. + + Args: + key: PRNG key + a: Shape parameter + shape: Output shape + dtype: Output data type + + Returns: + Random samples from log-gamma distribution + """ +``` + +### Discrete Distributions + +Sample from discrete probability distributions. + +```python { .api } +def bernoulli(key: Array, p=0.5, shape=(), dtype=int) -> Array: + """ + Sample from Bernoulli distribution. + + Args: + key: PRNG key + p: Success probability + shape: Output shape + dtype: Output data type + + Returns: + Random samples from Bernoulli(p) + """ + +def binomial(key: Array, n: Array, p: Array, shape=(), dtype=int) -> Array: + """ + Sample from binomial distribution. + + Args: + key: PRNG key + n: Number of trials + p: Success probability per trial + shape: Output shape + dtype: Output data type + + Returns: + Random samples from Binomial(n, p) + """ + +def categorical( + key: Array, + logits: Array, + axis=-1, + shape=None +) -> Array: + """ + Sample from categorical distribution. + + Args: + key: PRNG key + logits: Log-probability array + axis: Axis over which to normalize + shape: Output shape + + Returns: + Random categorical indices + """ + +def choice( + key: Array, + a: int | Array, + shape=(), + replace=True, + p=None, + axis=0 +) -> Array: + """ + Random choice from array elements. + + Args: + key: PRNG key + a: Array to sample from or integer (range) + shape: Output shape + replace: Whether to sample with replacement + p: Probabilities for each element + axis: Axis to sample along + + Returns: + Random samples from input array + """ + +def geometric(key: Array, p: Array, shape=(), dtype=int) -> Array: + """ + Sample from geometric distribution. + + Args: + key: PRNG key + p: Success probability + shape: Output shape + dtype: Output data type + + Returns: + Random samples from Geometric(p) + """ + +def poisson(key: Array, lam: Array, shape=(), dtype=int) -> Array: + """ + Sample from Poisson distribution. + + Args: + key: PRNG key + lam: Rate parameter + shape: Output shape + dtype: Output data type + + Returns: + Random samples from Poisson(lam) + """ + +def multinomial( + key: Array, + n: Array, + pvals: Array, + shape=(), + dtype=int +) -> Array: + """ + Sample from multinomial distribution. + + Args: + key: PRNG key + n: Number of trials + pvals: Probability values for each category + shape: Batch shape + dtype: Output data type + + Returns: + Random samples from Multinomial(n, pvals) + """ + +def randint( + key: Array, + minval: int, + maxval: int, + shape=(), + dtype=int +) -> Array: + """ + Sample random integers from [minval, maxval). + + Args: + key: PRNG key + minval: Minimum value (inclusive) + maxval: Maximum value (exclusive) + shape: Output shape + dtype: Output data type + + Returns: + Random integers in specified range + """ + +def rademacher(key: Array, shape=(), dtype=int) -> Array: + """ + Sample from Rademacher distribution (±1 with equal probability). + + Args: + key: PRNG key + shape: Output shape + dtype: Output data type + + Returns: + Random samples from {-1, +1} + """ +``` + +### Specialized Sampling + +Special sampling functions for geometric shapes and structured sampling. + +```python { .api } +def ball(key: Array, d: int, p=2, shape=(), dtype=float) -> Array: + """ + Sample uniformly from d-dimensional unit ball. + + Args: + key: PRNG key + d: Dimension of ball + p: Norm type (default: 2 for Euclidean) + shape: Batch shape + dtype: Output data type + + Returns: + Random samples from unit ball + """ + +def orthogonal(key: Array, n: int, shape=(), dtype=float) -> Array: + """ + Sample random orthogonal matrix. + + Args: + key: PRNG key + n: Matrix dimension + shape: Batch shape + dtype: Output data type + + Returns: + Random orthogonal matrix of size (n, n) + """ + +def permutation(key: Array, x: int | Array, axis=0, independent=False) -> Array: + """ + Generate random permutation of array or integers. + + Args: + key: PRNG key + x: Array to permute or integer (range) + axis: Axis to permute along + independent: Whether to permute each batch element independently + + Returns: + Randomly permuted array + """ + +def bits(key: Array, width=64, shape=(), dtype=None) -> Array: + """ + Generate random bits. + + Args: + key: PRNG key + width: Number of bits per sample + shape: Output shape + dtype: Output data type + + Returns: + Random bit patterns + """ +``` + +## Usage Examples + +Common patterns for JAX random number generation: + +```python +import jax +import jax.numpy as jnp +import jax.random as jr + +# Create and split keys +main_key = jr.key(42) +key1, key2, key3 = jr.split(main_key, 3) + +# Basic sampling +samples = jr.normal(key1, (1000,)) +random_ints = jr.randint(key2, 0, 10, (100,)) + +# Batch sampling with same key +batch_samples = jr.normal(key3, (32, 784)) # 32 samples of 784 dims + +# Different keys for each batch element +keys = jr.split(main_key, 32) +independent_samples = jax.vmap( + lambda k: jr.normal(k, (784,)) +)(keys) + +# Random choice and permutation +data = jnp.arange(100) +shuffled = jr.permutation(key1, data) +selected = jr.choice(key2, data, (10,), replace=False) + +# Multivariate distributions +mean = jnp.zeros(5) +cov = jnp.eye(5) +mv_samples = jr.multivariate_normal(key1, mean, cov, (1000,)) + +# Discrete distributions +coin_flips = jr.bernoulli(key1, 0.6, (100,)) +dice_rolls = jr.categorical(key2, jnp.log(jnp.ones(6) / 6), (100,)) + +# Using in neural network initialization +def init_layer_weights(key, input_dim, output_dim): + w_key, b_key = jr.split(key) + # Xavier/Glorot initialization + std = jnp.sqrt(2.0 / (input_dim + output_dim)) + weights = jr.normal(w_key, (input_dim, output_dim)) * std + biases = jr.normal(b_key, (output_dim,)) * 0.01 + return weights, biases + +# Stochastic gradient descent with random batching +def get_random_batch(key, data, batch_size): + indices = jr.choice(key, len(data), (batch_size,), replace=False) + return data[indices] +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-jax/docs/scipy-compatibility.md b/.tessl/tiles/tessl/pypi-jax/docs/scipy-compatibility.md new file mode 100644 index 0000000..1c7bfc3 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-jax/docs/scipy-compatibility.md @@ -0,0 +1,807 @@ +# SciPy Compatibility + +JAX provides SciPy-compatible functions through `jax.scipy` for scientific computing including linear algebra, signal processing, special functions, statistics, and sparse operations. These functions are differentiable and can be JIT-compiled. + +## Core Imports + +```python +import jax.scipy as jsp +import jax.scipy.linalg as jla +import jax.scipy.special as jss +import jax.scipy.stats as jst +``` + +## Capabilities + +### Linear Algebra (`jax.scipy.linalg`) + +Advanced linear algebra operations for matrix computations and decompositions. + +```python { .api } +# Matrix decompositions +def cholesky(a, lower=True) -> Array: + """ + Cholesky decomposition of positive definite matrix. + + Args: + a: Positive definite matrix to decompose + lower: Whether to return lower triangular factor + + Returns: + Cholesky factor L such that a = L @ L.T (or U.T @ U if upper) + """ + +def qr(a, mode='reduced') -> tuple[Array, Array]: + """ + QR decomposition of matrix. + + Args: + a: Matrix to decompose + mode: 'reduced' or 'complete' decomposition + + Returns: + Tuple (Q, R) where Q is orthogonal and R is upper triangular + """ + +def svd(a, full_matrices=True, compute_uv=True, hermitian=False) -> tuple[Array, Array, Array]: + """ + Singular Value Decomposition. + + Args: + a: Matrix to decompose + full_matrices: Whether to compute full or reduced SVD + compute_uv: Whether to compute U and V matrices + hermitian: Whether matrix is Hermitian + + Returns: + Tuple (U, s, Vh) where a = U @ diag(s) @ Vh + """ + +def eig(a, b=None, left=False, right=True, overwrite_a=False, overwrite_b=False, + check_finite=True, homogeneous_eigvals=False) -> tuple[Array, Array]: + """ + Eigenvalues and eigenvectors of general matrix. + + Args: + a: Square matrix + b: Optional matrix for generalized eigenvalue problem + left: Whether to compute left eigenvectors + right: Whether to compute right eigenvectors + overwrite_a: Whether input can be overwritten + overwrite_b: Whether b can be overwritten + check_finite: Whether to check for finite values + homogeneous_eigvals: Whether to return homogeneous eigenvalues + + Returns: + Tuple (eigenvalues, eigenvectors) + """ + +def eigh(a, b=None, lower=True, eigvals_only=False, overwrite_a=False, + overwrite_b=False, turbo=True, eigvals=None, type=1, + check_finite=True) -> tuple[Array, Array]: + """ + Eigenvalues and eigenvectors of Hermitian matrix. + + Args: + a: Hermitian matrix + b: Optional matrix for generalized problem + lower: Whether to use lower triangle + eigvals_only: Whether to compute eigenvalues only + overwrite_a: Whether input can be overwritten + overwrite_b: Whether b can be overwritten + turbo: Whether to use turbo algorithm + eigvals: Range of eigenvalue indices to compute + type: Type of generalized eigenvalue problem + check_finite: Whether to check for finite values + + Returns: + Eigenvalues (and eigenvectors if eigvals_only=False) + """ + +def eigvals(a, b=None, overwrite_a=False, check_finite=True, + homogeneous_eigvals=False) -> Array: + """Eigenvalues of general matrix.""" + +def eigvalsh(a, b=None, lower=True, overwrite_a=False, overwrite_b=False, + turbo=True, eigvals=None, type=1, check_finite=True) -> Array: + """Eigenvalues of Hermitian matrix.""" + +# Matrix properties and functions +def det(a) -> Array: + """Matrix determinant.""" + +def slogdet(a) -> tuple[Array, Array]: + """Sign and log determinant of matrix.""" + +def logdet(a) -> Array: + """Log determinant of matrix.""" + +def matrix_rank(M, tol=None, hermitian=False) -> Array: + """Matrix rank computation.""" + +def trace(a, offset=0, axis1=0, axis2=1) -> Array: + """Matrix trace.""" + +def norm(a, ord=None, axis=None, keepdims=False) -> Array: + """Matrix or vector norm.""" + +def cond(x, p=None) -> Array: + """Condition number of matrix.""" + +# Matrix solutions +def solve(a, b, assume_a='gen', lower=False, overwrite_a=False, + overwrite_b=False, debug=None, check_finite=True) -> Array: + """ + Solve linear system Ax = b. + + Args: + a: Coefficient matrix + b: Right-hand side vector/matrix + assume_a: Properties of matrix a ('gen', 'sym', 'her', 'pos') + lower: Whether to use lower triangle for triangular matrices + overwrite_a: Whether input can be overwritten + overwrite_b: Whether b can be overwritten + debug: Debug information level + check_finite: Whether to check for finite values + + Returns: + Solution x such that Ax = b + """ + +def solve_triangular(a, b, trans=0, lower=False, unit_diagonal=False, + overwrite_b=False, debug=None, check_finite=True) -> Array: + """Solve triangular linear system.""" + +def inv(a, overwrite_a=False, check_finite=True) -> Array: + """Matrix inverse.""" + +def pinv(a, rcond=None, hermitian=False, return_rank=False) -> Array: + """Moore-Penrose pseudoinverse.""" + +def lstsq(a, b, rcond=None, lapack_driver=None) -> tuple[Array, Array, Array, Array]: + """ + Least-squares solution to linear system. + + Args: + a: Coefficient matrix + b: Dependent variable values + rcond: Cutoff ratio for small singular values + lapack_driver: LAPACK driver to use + + Returns: + Tuple (solution, residuals, rank, singular_values) + """ + +# Matrix functions +def expm(A) -> Array: + """Matrix exponential.""" + +def funm(A, func, disp=True) -> Array: + """General matrix function evaluation.""" + +def sqrtm(A, disp=True, blocksize=64) -> Array: + """Matrix square root.""" + +def logm(A, disp=True) -> Array: + """Matrix logarithm.""" + +def fractional_matrix_power(A, t) -> Array: + """Fractional matrix power A^t.""" + +def matrix_power(A, n) -> Array: + """Integer matrix power A^n.""" + +# Schur decomposition +def schur(a, output='real') -> tuple[Array, Array]: + """Schur decomposition of matrix.""" + +def rsf2csf(T, Z) -> tuple[Array, Array]: + """Convert real Schur form to complex Schur form.""" + +# Polar decomposition +def polar(a, side='right') -> tuple[Array, Array]: + """Polar decomposition of matrix.""" +``` + +### Special Functions (`jax.scipy.special`) + +Special mathematical functions including error functions, gamma functions, and Bessel functions. + +```python { .api } +# Error functions +def erf(z) -> Array: + """Error function.""" + +def erfc(x) -> Array: + """Complementary error function.""" + +def erfinv(y) -> Array: + """Inverse error function.""" + +def erfcinv(y) -> Array: + """Inverse complementary error function.""" + +def wofz(z) -> Array: + """Faddeeva function.""" + +# Gamma functions +def gamma(z) -> Array: + """Gamma function.""" + +def gammaln(x) -> Array: + """Log gamma function.""" + +def digamma(x) -> Array: + """Digamma (psi) function.""" + +def polygamma(n, x) -> Array: + """Polygamma function.""" + +def gammainc(a, x) -> Array: + """Lower incomplete gamma function.""" + +def gammaincc(a, x) -> Array: + """Upper incomplete gamma function.""" + +def gammasgn(x) -> Array: + """Sign of gamma function.""" + +def rgamma(x) -> Array: + """Reciprocal gamma function.""" + +# Beta functions +def beta(a, b) -> Array: + """Beta function.""" + +def betaln(a, b) -> Array: + """Log beta function.""" + +def betainc(a, b, x) -> Array: + """Incomplete beta function.""" + +# Bessel functions +def j0(x) -> Array: + """Bessel function of the first kind of order 0.""" + +def j1(x) -> Array: + """Bessel function of the first kind of order 1.""" + +def jn(n, x) -> Array: + """Bessel function of the first kind of order n.""" + +def y0(x) -> Array: + """Bessel function of the second kind of order 0.""" + +def y1(x) -> Array: + """Bessel function of the second kind of order 1.""" + +def yn(n, x) -> Array: + """Bessel function of the second kind of order n.""" + +def i0(x) -> Array: + """Modified Bessel function of the first kind of order 0.""" + +def i0e(x) -> Array: + """Exponentially scaled modified Bessel function i0.""" + +def i1(x) -> Array: + """Modified Bessel function of the first kind of order 1.""" + +def i1e(x) -> Array: + """Exponentially scaled modified Bessel function i1.""" + +def iv(v, z) -> Array: + """Modified Bessel function of the first kind of real order.""" + +def k0(x) -> Array: + """Modified Bessel function of the second kind of order 0.""" + +def k0e(x) -> Array: + """Exponentially scaled modified Bessel function k0.""" + +def k1(x) -> Array: + """Modified Bessel function of the second kind of order 1.""" + +def k1e(x) -> Array: + """Exponentially scaled modified Bessel function k1.""" + +def kv(v, z) -> Array: + """Modified Bessel function of the second kind of real order.""" + +# Exponential integrals +def expi(x) -> Array: + """Exponential integral Ei.""" + +def expn(n, x) -> Array: + """Generalized exponential integral.""" + +# Log-sum-exp and related +def logsumexp(a, axis=None, b=None, keepdims=False, return_sign=False) -> Array: + """ + Compute log(sum(exp(a))) in numerically stable way. + + Args: + a: Input array + axis: Axis to sum over + b: Multiplier for each element + keepdims: Whether to keep reduced dimensions + return_sign: Whether to return sign separately + + Returns: + Log-sum-exp result + """ + +def softmax(x, axis=None) -> Array: + """Softmax function.""" + +def log_softmax(x, axis=None) -> Array: + """Log softmax function.""" + +# Combinatorial functions +def factorial(n, exact=False) -> Array: + """Factorial function.""" + +def factorial2(n, exact=False) -> Array: + """Double factorial function.""" + +def factorialk(n, k, exact=False) -> Array: + """Multifactorial function.""" + +def comb(N, k, exact=False, repetition=False) -> Array: + """Binomial coefficient.""" + +def perm(N, k, exact=False) -> Array: + """Permutation coefficient.""" + +# Elliptic integrals +def ellipk(m) -> Array: + """Complete elliptic integral of the first kind.""" + +def ellipe(m) -> Array: + """Complete elliptic integral of the second kind.""" + +def ellipkinc(phi, m) -> Array: + """Incomplete elliptic integral of the first kind.""" + +def ellipeinc(phi, m) -> Array: + """Incomplete elliptic integral of the second kind.""" + +# Zeta and related functions +def zeta(x, q=None) -> Array: + """Riemann or Hurwitz zeta function.""" + +def zetac(x) -> Array: + """Riemann zeta function minus 1.""" + +# Hypergeometric functions +def hyp1f1(a, b, x) -> Array: + """Confluent hypergeometric function 1F1.""" + +def hyp2f1(a, b, c, z) -> Array: + """Gaussian hypergeometric function 2F1.""" + +def hyperu(a, b, x) -> Array: + """Confluent hypergeometric function U.""" + +# Legendre functions +def legendre(n, x) -> Array: + """Legendre polynomial.""" + +def lpmv(m, v, x) -> Array: + """Associated Legendre function.""" + +# Spherical functions +def sph_harm(m, n, theta, phi) -> Array: + """Spherical harmonics.""" + +# Other special functions +def lambertw(z, k=0, tol=1e-8) -> Array: + """Lambert W function.""" + +def spence(z) -> Array: + """Spence function.""" + +def multigammaln(a, d) -> Array: + """Log of multivariate gamma function.""" + +def entr(x) -> Array: + """Elementwise function -x*log(x).""" + +def kl_div(x, y) -> Array: + """Elementwise function x*log(x/y) - x + y.""" + +def rel_entr(x, y) -> Array: + """Elementwise function x*log(x/y).""" + +def huber(delta, r) -> Array: + """Huber loss function.""" + +def pseudo_huber(delta, r) -> Array: + """Pseudo-Huber loss function.""" +``` + +### Statistics (`jax.scipy.stats`) + +Statistical distributions and functions for probability and hypothesis testing. + +```python { .api } +# Continuous distributions +class norm: + """Normal distribution.""" + @staticmethod + def pdf(x, loc=0, scale=1) -> Array: ... + @staticmethod + def logpdf(x, loc=0, scale=1) -> Array: ... + @staticmethod + def cdf(x, loc=0, scale=1) -> Array: ... + @staticmethod + def logcdf(x, loc=0, scale=1) -> Array: ... + @staticmethod + def sf(x, loc=0, scale=1) -> Array: ... + @staticmethod + def logsf(x, loc=0, scale=1) -> Array: ... + @staticmethod + def ppf(q, loc=0, scale=1) -> Array: ... + @staticmethod + def isf(q, loc=0, scale=1) -> Array: ... + +class multivariate_normal: + """Multivariate normal distribution.""" + @staticmethod + def pdf(x, mean=None, cov=1, allow_singular=False) -> Array: ... + @staticmethod + def logpdf(x, mean=None, cov=1, allow_singular=False) -> Array: ... + +class uniform: + """Uniform distribution.""" + @staticmethod + def pdf(x, loc=0, scale=1) -> Array: ... + @staticmethod + def logpdf(x, loc=0, scale=1) -> Array: ... + @staticmethod + def cdf(x, loc=0, scale=1) -> Array: ... + @staticmethod + def logcdf(x, loc=0, scale=1) -> Array: ... + @staticmethod + def sf(x, loc=0, scale=1) -> Array: ... + @staticmethod + def logsf(x, loc=0, scale=1) -> Array: ... + @staticmethod + def ppf(q, loc=0, scale=1) -> Array: ... + +class beta: + """Beta distribution.""" + @staticmethod + def pdf(x, a, b, loc=0, scale=1) -> Array: ... + @staticmethod + def logpdf(x, a, b, loc=0, scale=1) -> Array: ... + @staticmethod + def cdf(x, a, b, loc=0, scale=1) -> Array: ... + +class gamma: + """Gamma distribution.""" + @staticmethod + def pdf(x, a, loc=0, scale=1) -> Array: ... + @staticmethod + def logpdf(x, a, loc=0, scale=1) -> Array: ... + @staticmethod + def cdf(x, a, loc=0, scale=1) -> Array: ... + +class chi2: + """Chi-square distribution.""" + @staticmethod + def pdf(x, df, loc=0, scale=1) -> Array: ... + @staticmethod + def logpdf(x, df, loc=0, scale=1) -> Array: ... + @staticmethod + def cdf(x, df, loc=0, scale=1) -> Array: ... + +class t: + """Student's t-distribution.""" + @staticmethod + def pdf(x, df, loc=0, scale=1) -> Array: ... + @staticmethod + def logpdf(x, df, loc=0, scale=1) -> Array: ... + @staticmethod + def cdf(x, df, loc=0, scale=1) -> Array: ... + +class f: + """F-distribution.""" + @staticmethod + def pdf(x, dfn, dfd, loc=0, scale=1) -> Array: ... + @staticmethod + def logpdf(x, dfn, dfd, loc=0, scale=1) -> Array: ... + @staticmethod + def cdf(x, dfn, dfd, loc=0, scale=1) -> Array: ... + +class laplace: + """Laplace distribution.""" + @staticmethod + def pdf(x, loc=0, scale=1) -> Array: ... + @staticmethod + def logpdf(x, loc=0, scale=1) -> Array: ... + @staticmethod + def cdf(x, loc=0, scale=1) -> Array: ... + +class logistic: + """Logistic distribution.""" + @staticmethod + def pdf(x, loc=0, scale=1) -> Array: ... + @staticmethod + def logpdf(x, loc=0, scale=1) -> Array: ... + @staticmethod + def cdf(x, loc=0, scale=1) -> Array: ... + +class pareto: + """Pareto distribution.""" + @staticmethod + def pdf(x, b, loc=0, scale=1) -> Array: ... + @staticmethod + def logpdf(x, b, loc=0, scale=1) -> Array: ... + @staticmethod + def cdf(x, b, loc=0, scale=1) -> Array: ... + +class expon: + """Exponential distribution.""" + @staticmethod + def pdf(x, loc=0, scale=1) -> Array: ... + @staticmethod + def logpdf(x, loc=0, scale=1) -> Array: ... + @staticmethod + def cdf(x, loc=0, scale=1) -> Array: ... + +class lognorm: + """Log-normal distribution.""" + @staticmethod + def pdf(x, s, loc=0, scale=1) -> Array: ... + @staticmethod + def logpdf(x, s, loc=0, scale=1) -> Array: ... + @staticmethod + def cdf(x, s, loc=0, scale=1) -> Array: ... + +class truncnorm: + """Truncated normal distribution.""" + @staticmethod + def pdf(x, a, b, loc=0, scale=1) -> Array: ... + @staticmethod + def logpdf(x, a, b, loc=0, scale=1) -> Array: ... + @staticmethod + def cdf(x, a, b, loc=0, scale=1) -> Array: ... + +# Discrete distributions +class bernoulli: + """Bernoulli distribution.""" + @staticmethod + def pmf(k, p, loc=0) -> Array: ... + @staticmethod + def logpmf(k, p, loc=0) -> Array: ... + @staticmethod + def cdf(k, p, loc=0) -> Array: ... + +class binom: + """Binomial distribution.""" + @staticmethod + def pmf(k, n, p, loc=0) -> Array: ... + @staticmethod + def logpmf(k, n, p, loc=0) -> Array: ... + @staticmethod + def cdf(k, n, p, loc=0) -> Array: ... + +class geom: + """Geometric distribution.""" + @staticmethod + def pmf(k, p, loc=0) -> Array: ... + @staticmethod + def logpmf(k, p, loc=0) -> Array: ... + @staticmethod + def cdf(k, p, loc=0) -> Array: ... + +class nbinom: + """Negative binomial distribution.""" + @staticmethod + def pmf(k, n, p, loc=0) -> Array: ... + @staticmethod + def logpmf(k, n, p, loc=0) -> Array: ... + @staticmethod + def cdf(k, n, p, loc=0) -> Array: ... + +class poisson: + """Poisson distribution.""" + @staticmethod + def pmf(k, mu, loc=0) -> Array: ... + @staticmethod + def logpmf(k, mu, loc=0) -> Array: ... + @staticmethod + def cdf(k, mu, loc=0) -> Array: ... + +# Statistical functions +def mode(a, axis=0, nan_policy='propagate', keepdims=False) -> Array: + """Mode of array values along axis.""" + +def rankdata(a, method='average', axis=None) -> Array: + """Rank data along axis.""" + +def kendalltau(x, y, initial_lexsort=None, nan_policy='propagate', method='auto') -> tuple[Array, Array]: + """Kendall's tau correlation coefficient.""" + +def pearsonr(x, y) -> tuple[Array, Array]: + """Pearson correlation coefficient.""" + +def spearmanr(a, b=None, axis=0, nan_policy='propagate', alternative='two-sided') -> tuple[Array, Array]: + """Spearman correlation coefficient.""" +``` + +### Signal Processing (`jax.scipy.signal`) + +Signal processing functions for filtering, convolution, and spectral analysis. + +```python { .api } +def convolve(in1, in2, mode='full', method='auto') -> Array: + """N-dimensional convolution.""" + +def convolve2d(in1, in2, mode='full', boundary='fill', fillvalue=0) -> Array: + """2D convolution.""" + +def correlate(in1, in2, mode='full', method='auto') -> Array: + """Cross-correlation of two arrays.""" + +def correlate2d(in1, in2, mode='full', boundary='fill', fillvalue=0) -> Array: + """2D cross-correlation.""" + +def fftconvolve(in1, in2, mode='full', axes=None) -> Array: + """FFT-based convolution.""" + +def oaconvolve(in1, in2, mode='full', axes=None) -> Array: + """Overlap-add convolution.""" + +def lfilter(b, a, x, axis=-1, zi=None) -> Array: + """Linear digital filter.""" + +def filtfilt(b, a, x, axis=-1, padtype='odd', padlen=None, method='pad', irlen=None) -> Array: + """Zero-phase digital filtering.""" + +def sosfilt(sos, x, axis=-1, zi=None) -> Array: + """Filter using second-order sections.""" + +def sosfiltfilt(sos, x, axis=-1, padtype='odd', padlen=None) -> Array: + """Zero-phase filtering with second-order sections.""" + +def hilbert(x, N=None, axis=-1) -> Array: + """Hilbert transform.""" + +def hilbert2(x, N=None) -> Array: + """2D Hilbert transform.""" + +def decimate(x, q, n=None, ftype='iir', axis=-1, zero_phase=True) -> Array: + """Downsample signal by integer factor.""" + +def resample(x, num, t=None, axis=0, window=None, domain='time') -> Array: + """Resample signal to new sample rate.""" + +def resample_poly(x, up, down, axis=0, window='kaiser', padtype='constant', cval=None) -> Array: + """Resample using polyphase filtering.""" + +def upfirdn(h, x, up=1, down=1, axis=-1, mode='constant', cval=0) -> Array: + """Upsample, FIR filter, and downsample.""" + +def periodogram(x, fs=1.0, window='boxcar', nfft=None, detrend='constant', + return_onesided=True, scaling='density', axis=-1) -> tuple[Array, Array]: + """Periodogram power spectral density.""" + +def welch(x, fs=1.0, window='hann', nperseg=None, noverlap=None, nfft=None, + detrend='constant', return_onesided=True, scaling='density', axis=-1, + average='mean') -> tuple[Array, Array]: + """Welch's method for power spectral density.""" + +def csd(x, y, fs=1.0, window='hann', nperseg=None, noverlap=None, nfft=None, + detrend='constant', return_onesided=True, scaling='density', axis=-1, + average='mean') -> tuple[Array, Array]: + """Cross power spectral density.""" + +def coherence(x, y, fs=1.0, window='hann', nperseg=None, noverlap=None, nfft=None, + detrend='constant', axis=-1) -> tuple[Array, Array]: + """Coherence between signals.""" + +def spectrogram(x, fs=1.0, window='tukey', nperseg=None, noverlap=None, nfft=None, + detrend='constant', return_onesided=True, scaling='density', axis=-1, + mode='psd') -> tuple[Array, Array, Array]: + """Spectrogram using short-time Fourier transform.""" + +def stft(x, fs=1.0, window='hann', nperseg=256, noverlap=None, nfft=None, + detrend=False, return_onesided=True, boundary='zeros', padded=True, axis=-1) -> tuple[Array, Array, Array]: + """Short-time Fourier transform.""" + +def istft(Zxx, fs=1.0, window='hann', nperseg=None, noverlap=None, nfft=None, + input_onesided=True, boundary=True, time_axis=-1, freq_axis=-2) -> tuple[Array, Array]: + """Inverse short-time Fourier transform.""" + +def lombscargle(x, y, freqs, precenter=False, normalize=False) -> Array: + """Lomb-Scargle periodogram.""" + +def detrend(data, axis=-1, type='linear', bp=0, overwrite_data=False) -> Array: + """Remove linear trend from data.""" + +def find_peaks(x, height=None, threshold=None, distance=None, prominence=None, + width=None, wlen=None, rel_height=0.5, plateau_size=None) -> tuple[Array, dict]: + """Find peaks in 1D array.""" + +def peak_prominences(x, peaks, wlen=None) -> tuple[Array, Array, Array]: + """Calculate peak prominences.""" + +def peak_widths(x, peaks, rel_height=0.5, prominence_data=None, wlen=None) -> tuple[Array, Array, Array, Array]: + """Calculate peak widths.""" +``` + +### Other Submodules + +```python { .api } +# Fast Fourier Transform (jax.scipy.fft) +import jax.scipy.fft as jfft +# Same interface as jax.numpy.fft with additional functions + +# N-dimensional image processing (jax.scipy.ndimage) +import jax.scipy.ndimage as jnd +# Image filtering, morphology, and measurements + +# Sparse matrix operations (jax.scipy.sparse) +import jax.scipy.sparse as jss +# Sparse matrix formats and operations + +# Interpolation (jax.scipy.interpolate) +import jax.scipy.interpolate as jsi +# 1D and multidimensional interpolation + +# Clustering (jax.scipy.cluster) +import jax.scipy.cluster as jsc +# Hierarchical and k-means clustering + +# Integration and ODE solving (jax.scipy.integrate) +import jax.scipy.integrate as jsi +# Numerical integration and differential equation solving +``` + +## Usage Examples + +```python +import jax.numpy as jnp +import jax.scipy as jsp +import jax.scipy.linalg as jla +import jax.scipy.special as jss +import jax.scipy.stats as jst + +# Linear algebra example +A = jnp.array([[4.0, 2.0], [2.0, 3.0]]) +b = jnp.array([1.0, 2.0]) + +# Solve linear system +x = jla.solve(A, b) + +# Compute eigenvalues and eigenvectors +eigenvals, eigenvecs = jla.eigh(A) + +# Matrix decomposition +L = jla.cholesky(A) # A = L @ L.T + +# Special functions +x = jnp.linspace(-3, 3, 100) +erf_vals = jss.erf(x) +gamma_vals = jss.gamma(x + 1) + +# Statistical distributions +data = jnp.array([1.2, 2.3, 1.8, 3.1, 2.7]) +log_likelihood = jst.norm.logpdf(data, loc=2.0, scale=1.0).sum() + +# Probability density functions +x_vals = jnp.linspace(0, 5, 100) +pdf_vals = jst.gamma.pdf(x_vals, a=2.0, scale=1.0) + +# Use in optimization with JAX transformations +@jax.jit +def neg_log_likelihood(params, data): + mu, sigma = params + return -jst.norm.logpdf(data, mu, sigma).sum() + +# Compute gradient for maximum likelihood estimation +grad_fn = jax.grad(neg_log_likelihood) +gradients = grad_fn([2.0, 1.0], data) +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-jax/docs/tree-operations.md b/.tessl/tiles/tessl/pypi-jax/docs/tree-operations.md new file mode 100644 index 0000000..06efb0f --- /dev/null +++ b/.tessl/tiles/tessl/pypi-jax/docs/tree-operations.md @@ -0,0 +1,400 @@ +# Tree Operations + +JAX provides utilities for working with PyTrees (nested Python data structures containing arrays) through `jax.tree`. PyTrees are fundamental to JAX's functional programming approach and enable elegant handling of complex nested data structures like neural network parameters. + +## Core Imports + +```python +import jax.tree as jtree +from jax.tree import map, flatten, unflatten, reduce +``` + +## What are PyTrees? + +PyTrees are nested Python data structures where: +- **Leaves** are arrays, scalars, or None +- **Nodes** are containers like lists, tuples, dicts, or custom classes +- The tree structure is preserved while operations apply to leaves + +Common PyTree examples: +```python +# Simple trees +tree1 = [1, 2, 3] # List of scalars +tree2 = {'a': jnp.array([1, 2]), 'b': jnp.array([3, 4])} # Dict of arrays + +# Nested trees (neural network parameters) +params = { + 'dense1': {'weight': jnp.zeros((784, 128)), 'bias': jnp.zeros(128)}, + 'dense2': {'weight': jnp.zeros((128, 10)), 'bias': jnp.zeros(10)} +} + +# Mixed structures +state = { + 'params': params, + 'batch_stats': {'mean': jnp.zeros(128), 'var': jnp.ones(128)}, + 'step': 0 # Scalar leaf +} +``` + +## Capabilities + +### Tree Traversal and Transformation + +Apply functions to all leaves while preserving tree structure. + +```python { .api } +def map(f, tree, *rest, is_leaf=None) -> Any: + """ + Apply function to all leaves of one or more trees. + + Args: + f: Function to apply to leaves + tree: Primary PyTree + rest: Additional PyTrees with same structure + is_leaf: Optional function to determine what counts as leaf + + Returns: + PyTree with same structure as input, f applied to all leaves + """ + +def map_with_path(f, tree, *rest, is_leaf=None) -> Any: + """ + Apply function to leaves with path information. + + Args: + f: Function taking (path, *leaves) as arguments + tree: Primary PyTree + rest: Additional PyTrees with same structure + is_leaf: Optional function to determine what counts as leaf + + Returns: + PyTree with f applied to leaves, receiving path info + """ + +def reduce(function, tree, initializer=None, is_leaf=None) -> Any: + """ + Reduce tree to single value by applying function to all leaves. + + Args: + function: Binary function to combine leaves + tree: PyTree to reduce + initializer: Optional initial value for reduction + is_leaf: Optional function to determine what counts as leaf + + Returns: + Single value from reducing all leaves + """ + +def all(tree) -> bool: + """ + Return True if all leaves are truthy. + + Args: + tree: PyTree to check + + Returns: + Boolean indicating if all leaves are truthy + """ +``` + +Usage examples: +```python +# Apply function to all arrays in parameter tree +def init_weights(params): + return jtree.map(lambda x: x * 0.01, params) + +# Element-wise operations on multiple trees +def add_trees(tree1, tree2): + return jtree.map(lambda x, y: x + y, tree1, tree2) + +# Compute total number of parameters +def count_params(params): + return jtree.reduce(lambda count, x: count + x.size, params, initializer=0) + +# Check if all gradients are finite +def all_finite(grads): + return jtree.all(jtree.map(jnp.isfinite, grads)) + +# Apply different functions based on path +def scale_by_path(path, param): + if 'bias' in path: + return param * 0.1 # Smaller learning rate for biases + else: + return param * 1.0 + +scaled_grads = jtree.map_with_path(scale_by_path, gradients) +``` + +### Tree Structure Operations + +Flatten trees into lists and reconstruct them, useful for interfacing with optimizers and other libraries. + +```python { .api } +def flatten(tree, is_leaf=None) -> tuple[list, Any]: + """ + Flatten PyTree into list of leaves and tree definition. + + Args: + tree: PyTree to flatten + is_leaf: Optional function to determine what counts as leaf + + Returns: + Tuple of (leaves_list, tree_definition) + """ + +def unflatten(treedef, leaves) -> Any: + """ + Reconstruct PyTree from tree definition and leaves. + + Args: + treedef: Tree definition from flatten() + leaves: List of leaf values + + Returns: + Reconstructed PyTree with original structure + """ + +def flatten_with_path(tree, is_leaf=None) -> tuple[list, list]: + """ + Flatten PyTree with path information for each leaf. + + Args: + tree: PyTree to flatten + is_leaf: Optional function to determine what counts as leaf + + Returns: + Tuple of (path_leaf_pairs, tree_definition) + """ + +def leaves(tree, is_leaf=None) -> list: + """ + Get list of all leaves in PyTree. + + Args: + tree: PyTree to extract leaves from + is_leaf: Optional function to determine what counts as leaf + + Returns: + List containing all leaf values + """ + +def leaves_with_path(tree, is_leaf=None) -> list: + """ + Get list of (path, leaf) pairs. + + Args: + tree: PyTree to extract leaves from + is_leaf: Optional function to determine what counts as leaf + + Returns: + List of (path, leaf) tuples + """ + +def structure(tree, is_leaf=None) -> Any: + """ + Get tree structure (definition) without leaf values. + + Args: + tree: PyTree to get structure from + is_leaf: Optional function to determine what counts as leaf + + Returns: + Tree definition describing structure + """ +``` + +Usage examples: +```python +# Flatten for use with scipy optimizers +params = {'w': jnp.array([1, 2]), 'b': jnp.array([3])} +flat_params, tree_def = jtree.flatten(params) +print(flat_params) # [Array([1, 2]), Array([3])] + +# Reconstruct after optimization +new_flat_params = [jnp.array([4, 5]), jnp.array([6])] +new_params = jtree.unflatten(tree_def, new_flat_params) +print(new_params) # {'w': Array([4, 5]), 'b': Array([6])} + +# Get all parameter arrays +all_arrays = jtree.leaves(params) + +# Inspect structure with paths +path_leaf_pairs = jtree.leaves_with_path(params) +print(path_leaf_pairs) # [(('w',), Array([1, 2])), (('b',), Array([3]))] + +# Get structure for later use +structure_only = jtree.structure(params) +``` + +### Tree Transformation and Manipulation + +Advanced operations for tree manipulation and structural transformations. + +```python { .api } +def transpose(outer_treedef, inner_treedef, pytree_to_transpose) -> Any: + """ + Transpose nested PyTree structure. + + Args: + outer_treedef: Target outer tree structure + inner_treedef: Target inner tree structure + pytree_to_transpose: PyTree to transpose + + Returns: + PyTree with transposed nested structure + """ +``` + +Usage example: +```python +# Transpose structure: list of dicts -> dict of lists +list_of_dicts = [ + {'a': 1, 'b': 2}, + {'a': 3, 'b': 4}, + {'a': 5, 'b': 6} +] + +# Get structure definitions +outer_structure = jtree.structure(list_of_dicts) # List structure +inner_structure = jtree.structure({'a': None, 'b': None}) # Dict structure + +# Transpose to dict of lists +dict_of_lists = jtree.transpose(inner_structure, outer_structure, list_of_dicts) +print(dict_of_lists) # {'a': [1, 3, 5], 'b': [2, 4, 6]} +``` + +### Broadcasting and Advanced Operations + +```python { .api } +def broadcast(f, tree, *rest) -> Any: + """ + Broadcast function application across PyTree structures. + + Args: + f: Function to broadcast + tree: Primary PyTree + rest: Additional PyTrees (may have different but compatible structures) + + Returns: + PyTree result of broadcasting f across inputs + """ +``` + +## Custom PyTree Types + +Register custom classes as PyTree nodes: + +```python +import jax + +# Register custom class as PyTree node +class MyContainer: + def __init__(self, data): + self.data = data + + def __repr__(self): + return f"MyContainer({self.data})" + +def container_flatten(container): + # Return (children, aux_data) where children are PyTrees + return (container.data.values(), tuple(container.data.keys())) + +def container_unflatten(aux_data, children): + # Reconstruct from aux_data and children + return MyContainer(dict(zip(aux_data, children))) + +# Register the PyTree node +jax.tree_util.register_pytree_node( + MyContainer, + container_flatten, + container_unflatten +) + +# Now MyContainer works with tree operations +container = MyContainer({'x': jnp.array([1, 2]), 'y': jnp.array([3, 4])}) +doubled = jtree.map(lambda x: x * 2, container) +print(doubled) # MyContainer({'x': Array([2, 4]), 'y': Array([6, 8])}) +``` + +## Common Usage Patterns + +### Neural Network Parameter Management + +```python +# Initialize network parameters as PyTree +def init_mlp_params(layer_sizes, key): + params = {} + keys = jax.random.split(key, len(layer_sizes) - 1) + + for i, (in_size, out_size) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])): + w_key, b_key = jax.random.split(keys[i]) + params[f'layer_{i}'] = { + 'weights': jax.random.normal(w_key, (in_size, out_size)) * 0.01, + 'biases': jnp.zeros(out_size) + } + return params + +# Apply gradients using tree operations +def update_params(params, grads, learning_rate): + return jtree.map(lambda p, g: p - learning_rate * g, params, grads) + +# Compute parameter statistics +def param_stats(params): + flat_params = jtree.leaves(params) + total_params = sum(p.size for p in flat_params) + param_norm = jnp.sqrt(sum(jnp.sum(p**2) for p in flat_params)) + return {'total_params': total_params, 'norm': param_norm} +``` + +### Optimizer State Management + +```python +# Adam optimizer state as PyTree +def init_adam_state(params): + return { + 'm': jtree.map(jnp.zeros_like, params), # First moment + 'v': jtree.map(jnp.zeros_like, params), # Second moment + 'step': 0 + } + +def adam_update(params, grads, state, learning_rate=1e-3, beta1=0.9, beta2=0.999, eps=1e-8): + step = state['step'] + 1 + + # Update biased moments + m = jtree.map(lambda m_prev, g: beta1 * m_prev + (1 - beta1) * g, state['m'], grads) + v = jtree.map(lambda v_prev, g: beta2 * v_prev + (1 - beta2) * g**2, state['v'], grads) + + # Bias correction + m_hat = jtree.map(lambda m_val: m_val / (1 - beta1**step), m) + v_hat = jtree.map(lambda v_val: v_val / (1 - beta2**step), v) + + # Parameter update + new_params = jtree.map( + lambda p, m_val, v_val: p - learning_rate * m_val / (jnp.sqrt(v_val) + eps), + params, m_hat, v_hat + ) + + new_state = {'m': m, 'v': v, 'step': step} + return new_params, new_state +``` + +### Batch Processing + +```python +# Process batch of PyTrees +def process_batch(batch_trees): + # batch_trees is a list of PyTrees + # Convert to PyTree of batched arrays + return jtree.map(lambda *arrays: jnp.stack(arrays), *batch_trees) + +# Example: batch of neural network inputs +batch_inputs = [ + {'image': jnp.ones((28, 28)), 'label': 5}, + {'image': jnp.zeros((28, 28)), 'label': 3}, + {'image': jnp.ones((28, 28)) * 0.5, 'label': 1} +] + +batched = process_batch(batch_inputs) +print(batched['image'].shape) # (3, 28, 28) +print(batched['label'].shape) # (3,) +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-jax/tile.json b/.tessl/tiles/tessl/pypi-jax/tile.json new file mode 100644 index 0000000..1d58bbe --- /dev/null +++ b/.tessl/tiles/tessl/pypi-jax/tile.json @@ -0,0 +1,7 @@ +{ + "name": "tessl/pypi-jax", + "version": "0.7.0", + "docs": "docs/index.md", + "describes": "pkg:pypi/jax@0.7.1", + "summary": "Differentiate, compile, and transform Numpy code." +} \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pandas/docs/api-types.md b/.tessl/tiles/tessl/pypi-pandas/docs/api-types.md new file mode 100644 index 0000000..86e8051 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pandas/docs/api-types.md @@ -0,0 +1,321 @@ +# pandas.api.types - Type Checking and Data Validation + +The `pandas.api.types` module provides comprehensive type checking functionality for pandas data structures and NumPy arrays. This module is essential for data validation, type inference, and conditional operations based on data types. + +## Core Imports + +```python +# Type checking functions +from pandas.api.types import ( + # Data type checking + is_bool_dtype, is_integer_dtype, is_float_dtype, is_numeric_dtype, + is_object_dtype, is_string_dtype, is_complex_dtype, + + # Temporal type checking + is_datetime64_dtype, is_datetime64_any_dtype, is_datetime64_ns_dtype, + is_timedelta64_dtype, is_timedelta64_ns_dtype, + + # Extension type checking + is_categorical_dtype, is_period_dtype, is_interval_dtype, + is_extension_array_dtype, is_signed_integer_dtype, is_unsigned_integer_dtype, + + # Value type checking + is_bool, is_integer, is_float, is_complex, is_number, is_scalar, + + # Structure checking + is_array_like, is_list_like, is_dict_like, is_file_like, is_hashable, + is_iterator, is_named_tuple, is_re, is_re_compilable, + + # Type inference and utilities + infer_dtype, pandas_dtype, is_dtype_equal, + + # Categorical operations + union_categoricals, + + # Extension dtypes + CategoricalDtype, DatetimeTZDtype, IntervalDtype, PeriodDtype +) +``` + +## Data Type Checking + +### Numeric Type Checking + +```python +# Basic numeric type detection +is_bool_dtype(arr_or_dtype) -> bool { .api } +is_integer_dtype(arr_or_dtype) -> bool { .api } +is_float_dtype(arr_or_dtype) -> bool { .api } +is_numeric_dtype(arr_or_dtype) -> bool { .api } +is_complex_dtype(arr_or_dtype) -> bool { .api } + +# Specific numeric type checking +is_signed_integer_dtype(arr_or_dtype) -> bool { .api } +is_unsigned_integer_dtype(arr_or_dtype) -> bool { .api } +is_any_real_numeric_dtype(arr_or_dtype) -> bool { .api } + +# Deprecated (pandas 2.1.0+) +is_int64_dtype(arr_or_dtype) -> bool { .api } # Use dtype == np.int64 instead +``` + +### Temporal Type Checking + +```python +# DateTime type detection +is_datetime64_dtype(arr_or_dtype) -> bool { .api } +is_datetime64_any_dtype(arr_or_dtype) -> bool { .api } +is_datetime64_ns_dtype(arr_or_dtype) -> bool { .api } + +# TimeDelta type detection +is_timedelta64_dtype(arr_or_dtype) -> bool { .api } +is_timedelta64_ns_dtype(arr_or_dtype) -> bool { .api } + +# Deprecated timezone-aware datetime checking (pandas 2.1.0+) +is_datetime64tz_dtype(arr_or_dtype) -> bool { .api } # Use isinstance(dtype, pd.DatetimeTZDtype) instead +``` + +### Extension Type Checking + +```python +# Pandas extension types +is_categorical_dtype(arr_or_dtype) -> bool { .api } # Deprecated: Use isinstance(dtype, pd.CategoricalDtype) +is_period_dtype(arr_or_dtype) -> bool { .api } # Deprecated: Use isinstance(dtype, pd.PeriodDtype) +is_interval_dtype(arr_or_dtype) -> bool { .api } # Deprecated: Use isinstance(dtype, pd.IntervalDtype) +is_extension_array_dtype(arr_or_dtype) -> bool { .api } + +# String and object types +is_object_dtype(arr_or_dtype) -> bool { .api } +is_string_dtype(arr_or_dtype) -> bool { .api } + +# Sparse arrays (deprecated pandas 2.1.0+) +is_sparse(arr) -> bool { .api } # Use isinstance(dtype, pd.SparseDtype) instead +``` + +## Value Type Checking + +### Scalar Type Detection + +```python +# Basic scalar type checking +is_bool(obj) -> bool { .api } +is_integer(obj) -> bool { .api } +is_float(obj) -> bool { .api } +is_complex(obj) -> bool { .api } +is_number(obj) -> bool { .api } +is_scalar(obj) -> bool { .api } +``` + +### Collection Type Detection + +```python +# Container type checking +is_array_like(obj) -> bool { .api } +is_list_like(obj) -> bool { .api } +is_dict_like(obj) -> bool { .api } +is_iterator(obj) -> bool { .api } + +# Specific structure checking +is_named_tuple(obj) -> bool { .api } +is_hashable(obj) -> bool { .api } +is_file_like(obj) -> bool { .api } + +# Regular expression checking +is_re(obj) -> bool { .api } +is_re_compilable(obj) -> bool { .api } +``` + +## Type Inference and Utilities + +### Data Type Inference + +```python +# Infer the type of scalar or array-like data +infer_dtype(value, skipna: bool = True) -> str { .api } +""" +Returns string labels for detected types: +- 'string', 'bytes', 'floating', 'integer', 'mixed-integer', 'mixed-integer-float' +- 'decimal', 'complex', 'categorical', 'boolean' +- 'datetime64', 'datetime', 'date', 'timedelta64', 'timedelta', 'time', 'period' +- 'mixed', 'unknown-array' +""" + +# Convert input to pandas/numpy dtype +pandas_dtype(dtype) -> DtypeObj { .api } + +# Compare two dtypes for equality +is_dtype_equal(source, target) -> bool { .api } +``` + +## Categorical Operations + +### Categorical Data Manipulation + +```python +# Combine multiple categorical arrays +union_categoricals( + to_union, + sort_categories: bool = False, + ignore_order: bool = False +) -> Categorical { .api } +""" +Combine list-like of Categorical-like objects, unioning categories. +All categories must have the same dtype. +""" +``` + +## Extension Dtype Classes + +### Core Extension Dtypes + +```python +# Categorical data type +class CategoricalDtype(categories=None, ordered=None) { .api } +""" +Type for categorical data with categories and orderedness. +Parameters: +- categories: Index-like, optional +- ordered: bool, default False +""" + +# Timezone-aware datetime type +class DatetimeTZDtype(unit='ns', tz=None) { .api } +""" +ExtensionDtype for timezone-aware datetime data. +Parameters: +- unit: str, default 'ns' +- tz: str, tzinfo, optional +""" + +# Interval data type +class IntervalDtype(subtype=None, closed='right') { .api } +""" +ExtensionDtype for Interval data. +Parameters: +- subtype: numpy dtype, optional +- closed: {'left', 'right', 'both', 'neither'}, default 'right' +""" + +# Period data type +class PeriodDtype(freq=None) { .api } +""" +ExtensionDtype for Period data. +Parameters: +- freq: str or DateOffset, optional +""" +``` + +## Type Definitions + +```python +from typing import Union, Any +from numpy import dtype as np_dtype +from pandas.core.dtypes.base import ExtensionDtype + +# Core type aliases +ArrayLike = Union[np.ndarray, 'Series', 'Index', 'ExtensionArray'] +DtypeObj = Union[np_dtype, ExtensionDtype] +Dtype = Union[str, np_dtype, ExtensionDtype, type] + +# Function signatures for key validation functions +def is_numeric_dtype(arr_or_dtype: ArrayLike | DtypeObj) -> bool: ... +def is_datetime64_any_dtype(arr_or_dtype: ArrayLike | DtypeObj) -> bool: ... +def is_categorical_dtype(arr_or_dtype: ArrayLike | DtypeObj) -> bool: ... # Deprecated +def infer_dtype(value: Any, skipna: bool = True) -> str: ... +``` + +## Usage Examples + +### Basic Type Checking + +```python +import pandas as pd +import numpy as np +from pandas.api.types import is_numeric_dtype, is_datetime64_any_dtype + +# Check series dtypes +numeric_series = pd.Series([1, 2, 3]) +string_series = pd.Series(['a', 'b', 'c']) +datetime_series = pd.Series(pd.date_range('2023-01-01', periods=3)) + +assert is_numeric_dtype(numeric_series) +assert not is_numeric_dtype(string_series) +assert is_datetime64_any_dtype(datetime_series) +``` + +### Type Inference + +```python +from pandas.api.types import infer_dtype + +# Infer types from mixed data +mixed_data = [1, 2.5, 3] +print(infer_dtype(mixed_data)) # 'mixed-integer-float' + +string_data = ['a', 'b', 'c'] +print(infer_dtype(string_data)) # 'string' + +datetime_data = pd.date_range('2023-01-01', periods=3) +print(infer_dtype(datetime_data)) # 'datetime64' +``` + +### Extension Dtype Usage + +```python +from pandas.api.types import CategoricalDtype, union_categoricals + +# Create categorical dtype +cat_dtype = CategoricalDtype(['low', 'medium', 'high'], ordered=True) +cat_series = pd.Series(['low', 'high', 'medium'], dtype=cat_dtype) + +# Combine categoricals +cat1 = pd.Categorical(['a', 'b']) +cat2 = pd.Categorical(['b', 'c']) +combined = union_categoricals([cat1, cat2]) +``` + +### Data Validation Pipeline + +```python +from pandas.api.types import ( + is_numeric_dtype, is_string_dtype, is_datetime64_any_dtype, + is_categorical_dtype +) + +def validate_dataframe_dtypes(df: pd.DataFrame) -> dict: + """Validate and report column dtypes.""" + report = {} + + for col in df.columns: + if is_numeric_dtype(df[col]): + report[col] = 'numeric' + elif is_string_dtype(df[col]): + report[col] = 'string' + elif is_datetime64_any_dtype(df[col]): + report[col] = 'datetime' + elif isinstance(df[col].dtype, pd.CategoricalDtype): # Modern approach + report[col] = 'categorical' + else: + report[col] = 'other' + + return report +``` + +## Migration Notes + +Several functions in pandas.api.types have been deprecated in favor of more explicit type checking: + +- `is_categorical_dtype()` → `isinstance(dtype, pd.CategoricalDtype)` +- `is_period_dtype()` → `isinstance(dtype, pd.PeriodDtype)` +- `is_interval_dtype()` → `isinstance(dtype, pd.IntervalDtype)` +- `is_datetime64tz_dtype()` → `isinstance(dtype, pd.DatetimeTZDtype)` +- `is_sparse()` → `isinstance(dtype, pd.SparseDtype)` +- `is_int64_dtype()` → `dtype == np.int64` + +The modern approach provides better type safety and clearer intent. + +## See Also + +- [pandas.DataFrame.dtypes](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.dtypes.html) - Access column dtypes +- [pandas.Series.dtype](https://pandas.pydata.org/docs/reference/api/pandas.Series.dtype.html) - Access series dtype +- [pandas.core.dtypes](https://pandas.pydata.org/docs/reference/api/pandas.core.dtypes.html) - Core dtype functionality +- [numpy.dtype](https://numpy.org/doc/stable/reference/arrays.dtypes.html) - NumPy dtype reference \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pandas/docs/configuration.md b/.tessl/tiles/tessl/pypi-pandas/docs/configuration.md new file mode 100644 index 0000000..e92ab1a --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pandas/docs/configuration.md @@ -0,0 +1,355 @@ +# Configuration and Options + +Pandas configuration system for controlling display options, computational behavior, and library-wide settings. + +## Core Imports + +```python +import pandas as pd +from pandas import get_option, set_option, reset_option, option_context +``` + +## Capabilities + +### Option Management + +Core functions for getting, setting, and managing pandas configuration options. + +```python { .api } +def get_option(pat): + """ + Retrieve the value of the specified option. + + Parameters: + - pat: str, regexp pattern for option name + + Returns: + object, option value + """ + +def set_option(pat, value): + """ + Set the value of the specified option. + + Parameters: + - pat: str, regexp pattern for option name + - value: object, new value for option + """ + +def reset_option(pat): + """ + Reset one or more options to their default value. + + Parameters: + - pat: str, regexp pattern for option name(s) to reset + """ + +def describe_option(pat, _print_desc=False): + """ + Print available options and their descriptions. + + Parameters: + - pat: str, regexp pattern for option name(s) to describe + - _print_desc: bool, whether to print descriptions + + Returns: + str or None, option descriptions + """ + +def option_context(*args): + """ + Context manager to temporarily set options. + + Parameters: + - args: pairs of (option_name, option_value) + + Usage: + with pd.option_context('display.max_rows', 100): + # Temporary setting active here + pass + # Setting reverted after context + """ + +# Access options registry directly +options: object # Configuration options registry object +``` + +### Display Options + +Configuration options controlling how pandas objects are displayed. + +```python { .api } +# Display formatting options (accessed via get_option/set_option): + +# General display +'display.chop_threshold' # float, threshold for displaying small numbers as zero +'display.colheader_justify' # str, column header alignment ('left', 'right') +'display.date_dayfirst' # bool, interpret ambiguous dates as dayfirst +'display.date_yearfirst' # bool, interpret ambiguous dates as yearfirst +'display.encoding' # str, character encoding to use for display +'display.expand_frame_repr' # bool, allow DataFrame repr to span multiple lines +'display.float_format' # callable, function to format floats +'display.large_repr' # str, display format for large frames ('truncate' or 'info') +'display.latex.escape' # bool, escape special characters in LaTeX output +'display.latex.longtable' # bool, use longtable format for LaTeX output +'display.latex.multicolumn' # bool, use multicolumn format for LaTeX output +'display.latex.multicolumn_format' # str, alignment for multicolumn LaTeX format +'display.latex.multirow' # bool, use multirow format for LaTeX output +'display.latex.repr' # bool, use LaTeX formatting for repr +'display.max_categories' # int, maximum categories to display for categorical columns +'display.max_columns' # int, maximum columns to display +'display.max_colwidth' # int, maximum column width in characters +'display.max_info_columns' # int, maximum columns for DataFrame.info() +'display.max_info_rows' # int, maximum rows for DataFrame.info() +'display.max_rows' # int, maximum rows to display +'display.max_seq_items' # int, maximum sequence items to display +'display.memory_usage' # str, memory usage display ('deep' or None) +'display.min_rows' # int, minimum rows to display when truncating +'display.multi_sparse' # bool, sparsify multi-index display +'display.notebook_repr_html' # bool, use HTML repr in Jupyter notebooks +'display.pprint_nest_depth' # int, nesting depth for pretty printing +'display.precision' # int, floating point precision for display +'display.show_dimensions' # bool, display DataFrame dimensions +'display.unicode.ambiguous_as_wide' # bool, use ambiguous Unicode characters +'display.unicode.east_asian_width' # bool, use East Asian character widths +'display.width' # int, display width in characters + +# HTML specific options +'display.html.border' # int, border attribute for HTML table tags +'display.html.table_schema' # bool, use table schema extension for HTML +'display.html.use_mathjax' # bool, use MathJax for mathematical notation + +# Styling options +'styler.format.decimal' # str, decimal separator for Styler formatting +'styler.format.escape' # str, escape method for Styler ('html', 'latex', None) +'styler.format.formatter' # dict, default formatters for Styler +'styler.format.na_rep' # str, representation for missing values in Styler +'styler.format.precision' # int, precision for Styler numeric formatting +'styler.format.thousands' # str, thousands separator for Styler +'styler.html.mathjax' # bool, use MathJax for mathematical notation in Styler +'styler.latex.environment' # str, LaTeX environment for Styler ('longtable', 'tabular') +'styler.latex.hrules' # bool, add horizontal rules in LaTeX Styler +'styler.latex.multicol_align' # str, multicolumn alignment for LaTeX Styler +'styler.latex.multirow_align' # str, multirow alignment for LaTeX Styler +'styler.render.encoding' # str, encoding for Styler rendering +'styler.render.max_columns' # int, maximum columns for Styler rendering +'styler.render.max_elements' # int, maximum elements for Styler rendering +'styler.render.max_rows' # int, maximum rows for Styler rendering +'styler.render.repr' # str, representation method for Styler ('html', 'latex') +'styler.sparse.columns' # bool, sparsify column MultiIndex in Styler +'styler.sparse.index' # bool, sparsify row MultiIndex in Styler +``` + +### Computational Options + +Configuration options affecting pandas computational behavior. + +```python { .api } +# Computational behavior options: + +'compute.use_bottleneck' # bool, use bottleneck library for numerical operations +'compute.use_numexpr' # bool, use numexpr library for expression evaluation +'compute.use_numba' # bool, use numba library for acceleration + +# Mode options +'mode.chained_assignment' # str, behavior for chained assignment ('raise', 'warn', None) +'mode.copy_on_write' # bool, enable copy-on-write mode +'mode.data_manager' # str, internal data manager to use (deprecated) +'mode.sim_interactive' # bool, simulate interactive mode for testing +'mode.string_storage' # str, default storage for StringDtype ('python', 'pyarrow') +'mode.use_inf_as_na' # bool, treat inf/-inf as NA in computations + +# Future warnings +'future.infer_string' # bool, enable string inference in future pandas version +'future.no_silent_downcasting' # bool, disable silent downcasting in future + +# I/O options +'io.common.default_buffer_size' # int, default buffer size for I/O operations +'io.excel.ods.reader' # str, default engine for reading ODS files +'io.excel.ods.writer' # str, default engine for writing ODS files +'io.excel.xls.reader' # str, default engine for reading XLS files +'io.excel.xls.writer' # str, default engine for writing XLS files +'io.excel.xlsb.reader' # str, default engine for reading XLSB files +'io.excel.xlsm.reader' # str, default engine for reading XLSM files +'io.excel.xlsm.writer' # str, default engine for writing XLSM files +'io.excel.xlsx.reader' # str, default engine for reading XLSX files +'io.excel.xlsx.writer' # str, default engine for writing XLSX files +'io.hdf.default_format' # str, default HDF5 format ('table' or 'fixed') +'io.hdf.dropna_table' # bool, drop NaN values when writing HDF5 table format +'io.parquet.engine' # str, default engine for parquet ('auto', 'pyarrow', 'fastparquet') +'io.sql.engine' # str, default engine for SQL operations ('auto') + +# Plotting options +'plotting.backend' # str, plotting backend to use ('matplotlib') +'plotting.matplotlib.register_converters' # bool, register pandas converters with matplotlib +``` + +### Advanced Configuration + +Functions for advanced configuration management and option introspection. + +```python { .api } +def show_versions(as_json=False): + """ + Provide useful information about pandas and the system. + + Parameters: + - as_json: bool, return information as JSON string + + Returns: + None or str, version information + """ + +# Configuration state functions +def using_copy_on_write(): + """ + Return whether copy-on-write is enabled. + + Returns: + bool, True if copy-on-write mode is enabled + """ + +def using_nullable_dtypes(): + """ + Return whether nullable dtypes are the default. + + Returns: + bool, True if nullable dtypes are default + """ + +def using_string_dtype(): + """ + Return whether string dtype inference is enabled. + + Returns: + bool, True if string dtype inference is enabled + """ + +# Custom formatting functions +def set_eng_float_format(accuracy=3, use_eng_prefix=False): + """ + Format float representation to engineering format. + + Parameters: + - accuracy: int, number of decimal places + - use_eng_prefix: bool, use engineering prefix (k, M, G, etc.) + """ +``` + +### Option Categories + +```python { .api } +# Major option categories and their purposes: + +class OptionCategories: + """Documentation of pandas option categories.""" + + DISPLAY = "display.*" # Control visual display of pandas objects + COMPUTE = "compute.*" # Control computational backends and optimizations + MODE = "mode.*" # Control pandas operational behavior modes + IO = "io.*" # Control input/output operations and engines + PLOTTING = "plotting.*" # Control plotting behavior and backends + STYLER = "styler.*" # Control DataFrame.style formatting options + FUTURE = "future.*" # Control future behavior and deprecation warnings +``` + +### Context Managers and Temporary Settings + +```python { .api } +# Common usage patterns for temporary option changes: + +# Single option change +def temp_option_example(): + """Example of temporary option change.""" + with pd.option_context('display.max_rows', 100): + # Temporarily show more rows + print(large_dataframe) + # Option automatically reverted + +# Multiple option changes +def multi_option_example(): + """Example of multiple temporary option changes.""" + with pd.option_context('display.max_rows', 50, + 'display.max_columns', 10, + 'display.precision', 2): + # Multiple temporary settings active + print(dataframe) + # All options automatically reverted + +# Nested contexts +def nested_context_example(): + """Example of nested option contexts.""" + with pd.option_context('display.max_rows', 100): + print("Outer context - 100 rows") + with pd.option_context('display.max_rows', 10): + print("Inner context - 10 rows") + print("Back to outer context - 100 rows") + print("Original setting restored") +``` + +### Commonly Used Options + +```python { .api } +# Most frequently modified options with common values: + +COMMON_OPTIONS = { + # Display options + 'display.max_rows': [10, 20, 50, 100, None], # None = show all + 'display.max_columns': [10, 20, None], # None = show all + 'display.width': [80, 120, 160, None], # None = auto-detect + 'display.precision': [2, 3, 4, 6], # decimal places + 'display.float_format': ['{:.2f}'.format, '{:.4f}'.format, None], + 'display.expand_frame_repr': [True, False], # multi-line repr + 'display.show_dimensions': [True, False], # show (rows, cols) + + # Computational options + 'mode.chained_assignment': ['warn', 'raise', None], + 'mode.copy_on_write': [True, False], + 'compute.use_bottleneck': [True, False], + 'compute.use_numexpr': [True, False], + + # I/O options + 'io.excel.xlsx.reader': ['openpyxl', 'xlrd'], + 'io.excel.xlsx.writer': ['openpyxl', 'xlsxwriter'], + 'io.parquet.engine': ['auto', 'pyarrow', 'fastparquet'], +} +``` + +## Types + +```python { .api } +# Option value types +OptionValue = Union[str, int, float, bool, None, callable] + +# Display alignment options +DisplayAlign = Literal['left', 'right', 'center'] + +# Large representation modes +LargeRepr = Literal['truncate', 'info'] + +# Chained assignment modes +ChainedAssignment = Literal['raise', 'warn', None] + +# String storage modes +StringStorage = Literal['python', 'pyarrow'] + +# I/O engine options +ExcelEngine = Literal['openpyxl', 'xlrd', 'xlsxwriter', 'odf', 'pyxlsb'] +ParquetEngine = Literal['auto', 'pyarrow', 'fastparquet'] +HDFFormat = Literal['table', 'fixed'] + +# Plotting backends +PlottingBackend = Literal['matplotlib'] + +# Memory usage options +MemoryUsage = Literal['deep', None] + +# LaTeX environment options +LatexEnvironment = Literal['longtable', 'tabular'] + +# Styler representation options +StylerRepr = Literal['html', 'latex'] + +# Float format function type +FloatFormatter = Callable[[float], str] +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pandas/docs/core-data-structures.md b/.tessl/tiles/tessl/pypi-pandas/docs/core-data-structures.md new file mode 100644 index 0000000..d32a037 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pandas/docs/core-data-structures.md @@ -0,0 +1,383 @@ +# Core Data Structures + +The fundamental data structures that form the foundation of pandas: DataFrame, Series, and various Index types. These structures provide the building blocks for all data manipulation operations. + +## Core Imports + +```python +import pandas as pd +from pandas import DataFrame, Series, Index +``` + +## Capabilities + +### DataFrame + +Two-dimensional labeled data structure with heterogeneous columns, similar to a spreadsheet or SQL table. The primary pandas data structure for most use cases. + +```python { .api } +class DataFrame: + def __init__(self, data=None, index=None, columns=None, dtype=None, copy=None): + """ + Two-dimensional, size-mutable, potentially heterogeneous tabular data. + + Parameters: + - data: dict, list, ndarray, Series, or DataFrame + - index: Index or array-like, row labels + - columns: Index or array-like, column labels + - dtype: data type to force + - copy: bool, copy data from inputs + """ + + def head(self, n=5): + """Return the first n rows.""" + + def tail(self, n=5): + """Return the last n rows.""" + + def info(self, verbose=None, buf=None, max_cols=None, memory_usage=None, show_counts=None, null_counts=None): + """Print concise summary of DataFrame.""" + + def describe(self, percentiles=None, include=None, exclude=None): + """Generate descriptive statistics.""" + + def shape(self): + """Return tuple of (rows, columns).""" + + def size(self): + """Return number of elements.""" + + def columns(self): + """Column labels.""" + + def index(self): + """Row labels.""" + + def dtypes(self): + """Data types of columns.""" + + def values(self): + """NumPy representation of DataFrame.""" + + def empty(self): + """True if DataFrame is empty.""" + + def copy(self, deep=True): + """Make a copy of DataFrame.""" + + def select_dtypes(self, include=None, exclude=None): + """Select columns based on data types.""" + + def astype(self, dtype, copy=True, errors='raise'): + """Cast DataFrame to specified dtype.""" + + def sort_values(self, by, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last', ignore_index=False, key=None): + """Sort by values along axis.""" + + def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, ignore_index=False, key=None): + """Sort by labels along axis.""" + + def drop(self, labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise'): + """Drop specified labels from rows or columns.""" + + def drop_duplicates(self, subset=None, keep='first', inplace=False, ignore_index=False): + """Remove duplicate rows.""" + + def dropna(self, axis=0, how='any', thresh=None, subset=None, inplace=False): + """Remove missing values.""" + + def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): + """Fill missing values.""" + + def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, observed=False, dropna=True): + """Group DataFrame by one or more columns.""" + + def apply(self, func, axis=0, raw=False, result_type=None, args=(), **kwargs): + """Apply function along axis.""" + + def applymap(self, func, na_action=None, **kwargs): + """Apply function element-wise.""" + + def aggregate(self, func, axis=0, *args, **kwargs): + """Aggregate using one or more operations.""" + + def transform(self, func, axis=0, *args, **kwargs): + """Transform using one or more operations.""" + + def set_index(self, keys, drop=True, append=False, inplace=False, verify_integrity=False): + """Set DataFrame index using existing columns.""" + + def reset_index(self, level=None, drop=False, inplace=False, col_level=0, col_fill=''): + """Reset index to default integer index.""" + + def reindex(self, labels=None, index=None, columns=None, axis=None, method=None, copy=True, level=None, fill_value=None, limit=None, tolerance=None): + """Conform DataFrame to new index.""" + + def to_csv(self, path_or_buf=None, sep=',', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, mode='w', encoding=None, compression='infer', quoting=None, quotechar='"', line_terminator=None, chunksize=None, date_format=None, doublequote=True, escapechar=None, decimal='.', errors='strict', storage_options=None): + """Write DataFrame to CSV file.""" + + def to_excel(self, excel_writer, sheet_name='Sheet1', na_rep='', float_format=None, columns=None, header=True, index=True, index_label=None, startrow=0, startcol=0, engine=None, merge_cells=True, encoding=None, inf_rep='inf', verbose=None, freeze_panes=None, storage_options=None): + """Write DataFrame to Excel file.""" + + def to_json(self, path_or_buf=None, orient=None, date_format=None, double_precision=10, force_ascii=True, date_unit='ms', default_handler=None, lines=False, compression='infer', index=True, indent=None, storage_options=None): + """Write DataFrame to JSON.""" + + def to_dict(self, orient='dict', into=dict): + """Convert DataFrame to dictionary.""" + + def to_numpy(self, dtype=None, copy=False, na_value=None): + """Convert DataFrame to NumPy array.""" +``` + +### Series + +One-dimensional labeled array capable of holding any data type. The basic building block of pandas data structures. + +```python { .api } +class Series: + def __init__(self, data=None, index=None, dtype=None, name=None, copy=None, fastpath=False): + """ + One-dimensional ndarray with axis labels. + + Parameters: + - data: array-like, dict, or scalar value + - index: array-like or Index, labels for the data + - dtype: data type for the series + - name: name for the Series + - copy: bool, copy input data + """ + + def head(self, n=5): + """Return the first n values.""" + + def tail(self, n=5): + """Return the last n values.""" + + def describe(self, percentiles=None, include=None, exclude=None): + """Generate descriptive statistics.""" + + def shape(self): + """Return tuple of shape.""" + + def size(self): + """Return number of elements.""" + + def index(self): + """Series index (labels).""" + + def values(self): + """NumPy representation of Series.""" + + def dtype(self): + """Data type of Series.""" + + def name(self): + """Name of Series.""" + + def empty(self): + """True if Series is empty.""" + + def copy(self, deep=True): + """Make a copy of Series.""" + + def astype(self, dtype, copy=True, errors='raise'): + """Cast Series to specified dtype.""" + + def sort_values(self, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last', ignore_index=False, key=None): + """Sort by values.""" + + def sort_index(self, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, ignore_index=False, key=None): + """Sort by index labels.""" + + def drop(self, labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise'): + """Drop specified labels.""" + + def drop_duplicates(self, keep='first', inplace=False): + """Remove duplicate values.""" + + def dropna(self, axis=0, inplace=False, how=None): + """Remove missing values.""" + + def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): + """Fill missing values.""" + + def apply(self, func, convert_dtype=True, args=(), **kwargs): + """Apply function to Series values.""" + + def map(self, arg, na_action=None): + """Map values using input mapping or function.""" + + def groupby(self, by=None, axis=0, level=None, as_index=True, sort=True, group_keys=True, squeeze=False, observed=False, dropna=True): + """Group Series by values.""" + + def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): + """Count unique values.""" + + def unique(self): + """Return unique values.""" + + def nunique(self, dropna=True): + """Count number of unique values.""" + + def mean(self, axis=None, skipna=True, level=None, numeric_only=None): + """Return mean of values.""" + + def median(self, axis=None, skipna=True, level=None, numeric_only=None): + """Return median of values.""" + + def std(self, axis=None, skipna=True, level=None, ddof=1, numeric_only=None): + """Return standard deviation.""" + + def var(self, axis=None, skipna=True, level=None, ddof=1, numeric_only=None): + """Return variance.""" + + def sum(self, axis=None, skipna=True, level=None, numeric_only=None, min_count=0): + """Return sum of values.""" + + def min(self, axis=None, skipna=True, level=None, numeric_only=None): + """Return minimum value.""" + + def max(self, axis=None, skipna=True, level=None, numeric_only=None): + """Return maximum value.""" + + def count(self, level=None): + """Count non-missing values.""" + + def to_dict(self, into=dict): + """Convert Series to dictionary.""" + + def to_list(self): + """Convert Series to list.""" + + def to_numpy(self, dtype=None, copy=False, na_value=None): + """Convert Series to NumPy array.""" +``` + +### Index + +Immutable sequence used for indexing and alignment in pandas data structures. + +```python { .api } +class Index: + def __init__(self, data=None, dtype=None, copy=False, name=None, tupleize_cols=True): + """ + Immutable sequence used for indexing and alignment. + + Parameters: + - data: array-like, sequence of labels + - dtype: data type for the index + - copy: bool, copy input data + - name: name for the Index + """ + + def shape(self): + """Return tuple of shape.""" + + def size(self): + """Return number of elements.""" + + def dtype(self): + """Data type of Index.""" + + def name(self): + """Name of Index.""" + + def names(self): + """Names of levels (for MultiIndex).""" + + def values(self): + """NumPy representation of Index.""" + + def empty(self): + """True if Index is empty.""" + + def copy(self, name=None, deep=False): + """Make a copy of Index.""" + + def astype(self, dtype, copy=True): + """Cast Index to specified dtype.""" + + def sort_values(self, return_indexer=False, ascending=True, na_position='last', key=None): + """Sort Index values.""" + + def drop(self, labels, errors='raise'): + """Drop specified labels from Index.""" + + def drop_duplicates(self, keep='first'): + """Remove duplicate values.""" + + def dropna(self, how='any'): + """Remove missing values.""" + + def fillna(self, value=None, downcast=None): + """Fill missing values.""" + + def unique(self, level=None): + """Return unique values.""" + + def nunique(self, dropna=True): + """Count number of unique values.""" + + def value_counts(self, normalize=False, sort=True, ascending=False, bins=None, dropna=True): + """Count unique values.""" + + def to_list(self): + """Convert Index to list.""" + + def to_numpy(self, dtype=None, copy=False, na_value=None): + """Convert Index to NumPy array.""" + + def to_series(self, index=None, name=None): + """Convert Index to Series.""" +``` + +### Specialized Index Types + +```python { .api } +class RangeIndex(Index): + """Immutable Index implementing a monotonic integer range.""" + def __init__(self, start=None, stop=None, step=None, dtype=None, copy=False, name=None): ... + +class CategoricalIndex(Index): + """Index based on an underlying Categorical.""" + def __init__(self, data=None, categories=None, ordered=None, dtype=None, copy=False, name=None): ... + +class MultiIndex(Index): + """Multi-level or hierarchical index object.""" + def __init__(self, levels=None, codes=None, sortorder=None, names=None, dtype=None, copy=False, name=None, verify_integrity=True): ... + +class IntervalIndex(Index): + """Index for intervals that are closed on the same side.""" + def __init__(self, data, closed=None, dtype=None, copy=False, name=None, verify_integrity=True): ... + +class DatetimeIndex(Index): + """Index for datetime64 data.""" + def __init__(self, data=None, freq=None, tz=None, normalize=False, closed=None, ambiguous='raise', dayfirst=False, yearfirst=False, dtype=None, copy=False, name=None): ... + +class TimedeltaIndex(Index): + """Index for timedelta64 data.""" + def __init__(self, data=None, unit=None, freq=None, closed=None, dtype=None, copy=False, name=None): ... + +class PeriodIndex(Index): + """Index for Period data.""" + def __init__(self, data=None, ordinal=None, freq=None, dtype=None, copy=False, name=None): ... +``` + +## Types + +```python { .api } +# Index slicing helper +IndexSlice: object # Slicing helper for MultiIndex + +# Grouper for groupby operations +class Grouper: + def __init__(self, key=None, level=None, freq=None, axis=0, sort=False, closed=None, label=None, how='mean', fill_method=None, limit=None, group_keys=True, origin='start_day', offset=None, dropna=True): ... + +# Named aggregation helper +class NamedAgg: + def __init__(self, column, aggfunc): ... + +# Flags for pandas objects +class Flags: + allows_duplicate_labels: bool +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pandas/docs/data-io.md b/.tessl/tiles/tessl/pypi-pandas/docs/data-io.md new file mode 100644 index 0000000..ca340ab --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pandas/docs/data-io.md @@ -0,0 +1,463 @@ +# Data Input/Output + +Comprehensive I/O capabilities for reading and writing data in various formats including CSV, Excel, JSON, SQL databases, HDF5, Parquet, and many statistical file formats. + +## Core Imports + +```python +import pandas as pd +from pandas import read_csv, read_excel, read_json, read_sql +``` + +## Capabilities + +### CSV and Text Files + +Read and write comma-separated values and other delimited text files. + +```python { .api } +def read_csv(filepath_or_buffer, sep=',', delimiter=None, header='infer', names=None, index_col=None, usecols=None, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, skipfooter=0, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, parse_dates=False, infer_datetime_format=False, keep_date_col=False, date_parser=None, date_format=None, dayfirst=False, cache_dates=True, iterator=False, chunksize=None, compression='infer', thousands=None, decimal='.', lineterminator=None, quotechar='"', quoting=0, doublequote=True, escapechar=None, comment=None, encoding=None, encoding_errors='strict', dialect=None, on_bad_lines='error', delim_whitespace=False, low_memory=True, memory_map=False, float_precision=None, storage_options=None, dtype_backend='numpy_nullable'): + """ + Read a comma-separated values (CSV) file into DataFrame. + + Parameters: + - filepath_or_buffer: str, path object, or file-like object + - sep: str, delimiter to use + - header: int, list of int, default 'infer', row(s) to use as column names + - names: array-like, list of column names to use + - index_col: int, str, sequence of int/str, or False, column(s) to use as row labels + - usecols: list-like or callable, return subset of columns + - dtype: type name or dict of column -> type, data type for data or columns + - na_values: scalar, str, list-like, or dict, additional strings to recognize as NA/NaN + - parse_dates: bool or list of int or names or list of lists or dict + - chunksize: int, return TextFileReader object for iteration + + Returns: + DataFrame or TextFileReader + """ + +def read_table(filepath_or_buffer, sep='\\t', delimiter=None, header='infer', names=None, index_col=None, usecols=None, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, skipfooter=0, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, parse_dates=False, infer_datetime_format=False, keep_date_col=False, date_parser=None, date_format=None, dayfirst=False, cache_dates=True, iterator=False, chunksize=None, compression='infer', thousands=None, decimal='.', lineterminator=None, quotechar='"', quoting=0, doublequote=True, escapechar=None, comment=None, encoding=None, encoding_errors='strict', dialect=None, on_bad_lines='error', delim_whitespace=False, low_memory=True, memory_map=False, float_precision=None, storage_options=None, dtype_backend='numpy_nullable'): + """ + Read general delimited file into DataFrame. + + Similar to read_csv but with tab delimiter by default. + """ + +def read_fwf(filepath_or_buffer, colspecs='infer', widths=None, infer_nrows=100, dtype_backend='numpy_nullable', iterator=False, chunksize=None, **kwargs): + """ + Read a table of fixed-width formatted lines into DataFrame. + + Parameters: + - filepath_or_buffer: str, path object, or file-like object + - colspecs: list of tuple (int, int) or 'infer', column specifications + - widths: list of int, width of each field + - infer_nrows: int, number of rows to consider when letting the parser determine colspecs + + Returns: + DataFrame or TextFileReader + """ +``` + +### Excel Files + +Read and write Microsoft Excel files (.xlsx, .xls). + +```python { .api } +def read_excel(io, sheet_name=0, header=0, names=None, index_col=None, usecols=None, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skiprows=None, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, parse_dates=False, date_parser=None, date_format=None, thousands=None, decimal='.', comment=None, skipfooter=0, storage_options=None, dtype_backend='numpy_nullable', engine_kwargs=None): + """ + Read an Excel file into a pandas DataFrame. + + Parameters: + - io: str, bytes, ExcelFile, xlrd.Book, path object, or file-like object + - sheet_name: str, int, list, or None, names of sheets or sheet positions to read + - header: int, list of int, default 0, row(s) to use as column names + - names: array-like, list of column names to use + - index_col: int, str, list of int, default None, column(s) to use as row labels + - usecols: str, list-like, or callable, return subset of columns + - dtype: type name or dict of column -> type, data type for data or columns + - skiprows: list-like, rows to skip at the beginning + - nrows: int, number of rows to parse + + Returns: + DataFrame or dict of DataFrames + """ + +class ExcelFile: + def __init__(self, path_or_buffer, engine=None, storage_options=None, engine_kwargs=None): + """ + Class for parsing tabular Excel sheets into DataFrame objects. + + Parameters: + - path_or_buffer: str, bytes, path object, or file-like object + - engine: str, engine to use for reading ('openpyxl', 'xlrd', 'odf', 'pyxlsb') + """ + + def parse(self, sheet_name=0, header=0, names=None, index_col=None, usecols=None, converters=None, dtype=None, true_values=None, false_values=None, skiprows=None, nrows=None, na_values=None, verbose=False, parse_dates=False, date_parser=None, thousands=None, comment=None, skipfooter=0, convert_float=None, mangle_dupe_cols=True, dtype_backend='numpy_nullable', **kwds): + """Parse specified sheet(s) into DataFrame.""" + + def sheet_names(self): + """Property returning list of sheet names.""" + +class ExcelWriter: + def __init__(self, path, engine=None, date_format=None, datetime_format=None, mode='w', storage_options=None, if_sheet_exists=None, engine_kwargs=None): + """ + Class for writing DataFrame objects into Excel sheets. + + Parameters: + - path: str or file-like object, file path or existing ExcelWriter + - engine: str, engine to use for writing ('openpyxl', 'xlsxwriter') + - mode: str, file mode to use (write or append) + """ + + def close(self): + """Close the contained workbook.""" + + def save(self): + """Save workbook to disk.""" +``` + +### JSON Files + +Read and write JavaScript Object Notation (JSON) format. + +```python { .api } +def read_json(path_or_buf=None, orient=None, typ='frame', dtype=None, convert_axes=None, convert_dates=True, keep_default_dates=True, precise_float=False, date_unit=None, encoding=None, encoding_errors='strict', lines=False, chunksize=None, compression='infer', nrows=None, storage_options=None, dtype_backend='numpy_nullable', engine='ujson'): + """ + Convert a JSON string to pandas object. + + Parameters: + - path_or_buf: str, path object, file-like object, or None + - orient: str, indication of expected JSON string format + - typ: str, type of object to recover ('frame' or 'series') + - dtype: bool or dict, data type for data or columns + - convert_dates: bool or list of str, dates to parse + - lines: bool, read file as one JSON object per line + - chunksize: int, return JsonReader object for iteration + + Returns: + Series, DataFrame, or JsonReader + """ + +def json_normalize(data, record_path=None, meta=None, meta_prefix=None, record_prefix=None, errors='raise', sep='.', max_level=None): + """ + Normalize semi-structured JSON data into a flat table. + + Parameters: + - data: dict or list of dicts, unserialized JSON objects + - record_path: str or list of str, path in each object to list of records + - meta: list of str, fields to use as metadata for each record + - sep: str, nested record separator + - max_level: int, max number of levels to normalize + + Returns: + DataFrame + """ +``` + +### SQL Databases + +Read and write data from SQL databases. + +```python { .api } +def read_sql(sql, con, index_col=None, coerce_float=True, params=None, parse_dates=None, columns=None, chunksize=None, dtype_backend='numpy_nullable', dtype=None): + """ + Read SQL query or database table into a DataFrame. + + Parameters: + - sql: str or SQLAlchemy Selectable, SQL query or table name + - con: ADBC Connection, SQLAlchemy connectable, str, or sqlite3 connection + - index_col: str or list of str, column(s) to use as row labels + - coerce_float: bool, attempts to convert values of non-string, non-numeric objects to floating point + - params: list, tuple, mapping or None, parameters to pass to execute method + - parse_dates: list or dict, columns to parse as dates + - chunksize: int, number of rows to include in each chunk + + Returns: + DataFrame or Iterator[DataFrame] + """ + +def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None, parse_dates=None, chunksize=None, dtype=None, dtype_backend='numpy_nullable'): + """ + Read SQL query into a DataFrame. + + Parameters: + - sql: str or SQLAlchemy Selectable, SQL query to be executed + - con: ADBC Connection, SQLAlchemy connectable, str, or sqlite3 connection + + Returns: + DataFrame or Iterator[DataFrame] + """ + +def read_sql_table(table_name, con, schema=None, index_col=None, coerce_float=True, parse_dates=None, columns=None, chunksize=None, dtype_backend='numpy_nullable'): + """ + Read SQL database table into a DataFrame. + + Parameters: + - table_name: str, name of SQL table in database + - con: ADBC Connection, SQLAlchemy connectable, str, or sqlite3 connection + - schema: str, name of SQL schema in database to query + - columns: list, list of column names to select from SQL table + + Returns: + DataFrame or Iterator[DataFrame] + """ +``` + +### Binary Formats + +Read and write binary file formats for efficient storage. + +```python { .api } +def read_pickle(filepath_or_buffer, compression='infer', storage_options=None): + """ + Load pickled pandas object (or any object) from file. + + Parameters: + - filepath_or_buffer: str, path object, or file-like object + - compression: str or dict, compression type and options + + Returns: + unpickled object + """ + +def to_pickle(obj, filepath_or_buffer, compression='infer', protocol=5, storage_options=None): + """ + Pickle (serialize) object to file. + + Parameters: + - obj: any object, object to pickle + - filepath_or_buffer: str, path object, or file-like object + - compression: str or dict, compression type and options + - protocol: int, pickle protocol to use + """ + +def read_hdf(path_or_buf, key=None, mode='r', errors='strict', where=None, start=None, stop=None, columns=None, iterator=False, chunksize=None, dtype_backend='numpy_nullable', **kwargs): + """ + Read from the store, close it if we opened it. + + Parameters: + - path_or_buf: str, path object, pandas.HDFStore, or file-like object + - key: str, identifier for group in store + - mode: str, mode to open file + - where: list of Term, criteria to select + - start: int, row number to start selection + - stop: int, row number to stop selection + - columns: list, list of columns to return + + Returns: + DataFrame or Series + """ + +class HDFStore: + def __init__(self, path, mode='r', complevel=None, complib=None, fletcher32=False, **kwargs): + """ + Dict-like IO interface for storing pandas objects in PyTables. + + Parameters: + - path: str, file path to HDF5 file + - mode: str, mode to open file + - complevel: int, compression level (0-9) + - complib: str, compression library + """ + + def put(self, key, value, format=None, index=True, append=False, complib=None, complevel=None, min_itemsize=None, nan_rep=None, data_columns=None, encoding=None, errors='strict', track_times=True, dropna=False): + """Store object in HDFStore.""" + + def get(self, key): + """Retrieve pandas object stored in file.""" + + def keys(self): + """Return list of keys in the store.""" + + def close(self): + """Close the store.""" + +def read_parquet(path, engine='auto', columns=None, storage_options=None, use_nullable_dtypes=False, dtype_backend='numpy_nullable', filesystem=None, filters=None, **kwargs): + """ + Load a parquet object, returning a DataFrame. + + Parameters: + - path: str, path object, or file-like object + - engine: str, parquet library to use ('auto', 'pyarrow', 'fastparquet') + - columns: list, columns to read + - filters: list of tuples, row group filters + + Returns: + DataFrame + """ + +def read_feather(path, columns=None, use_threads=True, storage_options=None, dtype_backend='numpy_nullable'): + """ + Load a feather-format object into a DataFrame. + + Parameters: + - path: str, path object, or file-like object + - columns: sequence, columns to read + - use_threads: bool, whether to parallelize reading + + Returns: + DataFrame + """ + +def read_orc(path, columns=None, dtype_backend='numpy_nullable', filesystem=None, **kwargs): + """ + Load an ORC object, returning a DataFrame. + + Parameters: + - path: str, path object, or file-like object + - columns: list, columns to read + + Returns: + DataFrame + """ +``` + +### Web and Markup + +Read data from web sources and markup formats. + +```python { .api } +def read_html(io, match='.+', header=None, index_col=None, skiprows=None, attrs=None, parse_dates=False, thousands=',', encoding=None, decimal='.', converters=None, na_values=None, keep_default_na=True, displayed_only=True, extract_links=None, dtype_backend='numpy_nullable', storage_options=None): + """ + Read HTML tables into a list of DataFrame objects. + + Parameters: + - io: str, path object, file-like object, or raw string containing HTML + - match: str or compiled regex, set of table attributes to match + - header: int or list-like, row(s) to use to make column headers + - index_col: int or list-like, column(s) to use to make row index + - skiprows: int, list-like or slice, rows to skip + - attrs: dict, attributes to match in table tag + + Returns: + list of DataFrames + """ + +def read_xml(path_or_buffer, xpath='./*', namespaces=None, elems_only=False, attrs_only=False, names=None, dtype=None, converters=None, parse_dates=None, encoding='utf-8', parser='lxml', tree_builder=None, stylesheet=None, iterparse=None, compression='infer', storage_options=None, dtype_backend='numpy_nullable'): + """ + Read XML document into a DataFrame object. + + Parameters: + - path_or_buffer: str, path object, or file-like object + - xpath: str, XPath expression to parse desired element(s) + - namespaces: dict, namespace prefixes and URIs + - elems_only: bool, parse child elements only + - attrs_only: bool, parse attributes only + - encoding: str, encoding of document + + Returns: + DataFrame + """ + +def read_clipboard(sep='\\s+', dtype_backend='numpy_nullable', **kwargs): + """ + Read text from clipboard and pass to read_csv. + + Parameters: + - sep: str, delimiter for splitting clipboard contents + + Returns: + DataFrame + """ +``` + +### Statistical Package Formats + +Read data from statistical software packages. + +```python { .api } +def read_stata(filepath_or_buffer, convert_dates=True, convert_categoricals=True, encoding=None, index_col=None, convert_missing=False, preserve_dtypes=True, columns=None, order_categoricals=True, chunksize=None, iterator=False, compression='infer', storage_options=None, dtype_backend='numpy_nullable'): + """ + Read Stata file into DataFrame. + + Parameters: + - filepath_or_buffer: str, path object, or file-like object + - convert_dates: bool, convert date variables to pandas datetime + - convert_categoricals: bool, convert categorical variables to pandas Categorical + - encoding: str, encoding used to decode text strings + - preserve_dtypes: bool, preserve Stata data types + + Returns: + DataFrame or StataReader + """ + +def read_sas(filepath_or_buffer, format=None, index=None, encoding=None, chunksize=None, iterator=False, compression='infer', storage_options=None, dtype_backend='numpy_nullable'): + """ + Read SAS files stored as either XPORT or SAS7BDAT format files. + + Parameters: + - filepath_or_buffer: str, path object, or file-like object + - format: str, file format ('xport' or 'sas7bdat') + - encoding: str, encoding for text data + - chunksize: int, rows to read at a time + + Returns: + DataFrame or SAS Reader + """ + +def read_spss(path, usecols=None, convert_categoricals=True, dtype_backend='numpy_nullable', storage_options=None): + """ + Load an SPSS file from the file path, returning a DataFrame. + + Parameters: + - path: str, path object, or file-like object + - usecols: list-like, return subset of columns + - convert_categoricals: bool, convert categorical variables to pandas Categorical + + Returns: + DataFrame + """ +``` + +### Google BigQuery + +Read data from Google BigQuery. + +```python { .api } +def read_gbq(query, project_id=None, index_col=None, col_order=None, reauth=False, auth_local_webserver=True, dialect=None, location=None, configuration=None, credentials=None, use_bqstorage_api=None, max_results=None, progress_bar_type=None, dtype_backend='numpy_nullable'): + """ + Load data from Google BigQuery. + + Parameters: + - query: str, SQL-Like Query to return data values + - project_id: str, Google BigQuery Account project ID + - index_col: str, name of result column to use for index + - col_order: list(str), list of BigQuery column names in desired order + - reauth: bool, force Google BigQuery to re-authenticate user + - dialect: str, SQL dialect for BigQuery ('legacy' or 'standard') + + Returns: + DataFrame + """ +``` + +## Types + +```python { .api } +# File reader classes for chunked reading +class TextFileReader: + """Iterator for reading CSV files in chunks.""" + def __iter__(self): ... + def __next__(self): ... + def get_chunk(self, size=None): ... + def close(self): ... + +class JsonReader: + """Iterator for reading JSON files in chunks.""" + def __iter__(self): ... + def __next__(self): ... + def close(self): ... + +# Storage format constants +class ExcelWriterMode: + WRITE = 'w' + APPEND = 'a' + +class JSONOrient: + SPLIT = 'split' + RECORDS = 'records' + INDEX = 'index' + COLUMNS = 'columns' + VALUES = 'values' + TABLE = 'table' +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pandas/docs/data-manipulation.md b/.tessl/tiles/tessl/pypi-pandas/docs/data-manipulation.md new file mode 100644 index 0000000..d14ba3b --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pandas/docs/data-manipulation.md @@ -0,0 +1,370 @@ +# Data Manipulation and Reshaping + +Functions for combining, reshaping, and transforming data including merging, concatenation, pivoting, melting, and advanced data restructuring operations. + +## Core Imports + +```python +import pandas as pd +from pandas import concat, merge, pivot_table, melt +``` + +## Capabilities + +### Combining Data + +Functions to combine multiple DataFrames or Series through concatenation and merging operations. + +```python { .api } +def concat(objs, axis=0, join='outer', ignore_index=False, keys=None, levels=None, names=None, verify_integrity=False, sort=False, copy=True): + """ + Concatenate pandas objects along a particular axis. + + Parameters: + - objs: sequence or mapping of Series or DataFrame objects + - axis: int, axis to concatenate along (0='index', 1='columns') + - join: str, how to handle indexes on other axis ('inner'/'outer') + - ignore_index: bool, do not use index values along concatenation axis + - keys: sequence, construct hierarchical index using passed keys + - levels: list of sequences, specific levels to use for constructing MultiIndex + - names: list, names for levels in hierarchical index + - verify_integrity: bool, check whether new concatenated axis contains duplicates + - sort: bool, sort non-concatenation axis if not already aligned + + Returns: + object, type of objects being concatenated + """ + +def merge(left, right, how='inner', on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, suffixes=('_x', '_y'), copy=True, indicator=False, validate=None): + """ + Merge DataFrame or named Series objects with a database-style join. + + Parameters: + - left: DataFrame or named Series + - right: DataFrame or named Series + - how: str, type of merge ('left', 'right', 'outer', 'inner', 'cross') + - on: label or list, column names to join on + - left_on: label or list, left DataFrame column names to join on + - right_on: label or list, right DataFrame column names to join on + - left_index: bool, use left DataFrame index as join key + - right_index: bool, use right DataFrame index as join key + - sort: bool, sort join keys lexicographically + - suffixes: list-like, suffix to apply to overlapping column names + - indicator: bool or str, add column to output indicating source of each row + - validate: str, validate uniqueness of merge keys + + Returns: + DataFrame + """ + +def merge_asof(left, right, on=None, left_on=None, right_on=None, left_index=False, right_index=False, by=None, left_by=None, right_by=None, suffixes=('_x', '_y'), tolerance=None, allow_exact_matches=True, direction='backward'): + """ + Perform a merge by key distance. + + Parameters: + - left: DataFrame or named Series + - right: DataFrame or named Series + - on: label, column name to merge on (must be sorted) + - by: column name or list of column names, match on these columns before searching + - tolerance: int or Timedelta, select closest key within this distance + - allow_exact_matches: bool, allow matching with exact same key + - direction: str, search direction ('backward', 'forward', 'nearest') + + Returns: + DataFrame + """ + +def merge_ordered(left, right, on=None, left_on=None, right_on=None, left_by=None, right_by=None, fill_method=None, suffixes=('_x', '_y'), how='outer'): + """ + Perform merge with optional filling/interpolation. + + Parameters: + - left: DataFrame or named Series + - right: DataFrame or named Series + - fill_method: str, interpolation method ('ffill') + - how: str, type of merge ('left', 'right', 'outer', 'inner') + + Returns: + DataFrame + """ +``` + +### Reshaping Data + +Functions to reshape data between wide and long formats, create pivot tables, and restructure DataFrames. + +```python { .api } +def pivot(data, index=None, columns=None, values=None): + """ + Return reshaped DataFrame organized by given index/column values. + + Parameters: + - data: DataFrame + - index: column to use to make new frame's index + - columns: column to use to make new frame's columns + - values: column(s) to use for populating new frame's values + + Returns: + DataFrame + """ + +def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, margins_name='All', observed=False, sort=True): + """ + Create a spreadsheet-style pivot table as a DataFrame. + + Parameters: + - data: DataFrame + - values: column to aggregate + - index: column, Grouper, array, list of columns to use as index + - columns: column, Grouper, array, list of columns to use as columns + - aggfunc: function, function to use for aggregation ('mean', 'sum', 'count', etc.) + - fill_value: scalar, value to replace missing values + - margins: bool, add row/column margins (subtotals) + - dropna: bool, do not include columns with all NaN values + - margins_name: str, name of row/column containing totals + - observed: bool, for categorical columns, consider only observed categories + + Returns: + DataFrame + """ + +def melt(data, id_vars=None, value_vars=None, var_name=None, value_name='value', col_level=None, ignore_index=True): + """ + Unpivot a DataFrame from wide to long format. + + Parameters: + - data: DataFrame + - id_vars: column(s) to use as identifier variables + - value_vars: column(s) to unpivot, defaults to all columns not in id_vars + - var_name: str, name to use for variable column + - value_name: str, name to use for value column + - col_level: int or str, level in columns to melt + - ignore_index: bool, ignore index in result + + Returns: + DataFrame + """ + +def wide_to_long(df, stubnames, i, j, sep='', suffix='\\d+'): + """ + Pivot a wide table to long (stacked) format. + + Parameters: + - df: DataFrame + - stubnames: str or list, stub name(s) + - i: column(s) to use as id variable(s) + - j: str, suffix of wide variables + - sep: str, separator between stub names and suffix + - suffix: str, regular expression for suffix + + Returns: + DataFrame + """ + +def lreshape(data, groups, dropna=True): + """ + Reshape wide-format data to long. + + Parameters: + - data: DataFrame + - groups: dict, mapping of column names to group labels + - dropna: bool, drop rows containing missing values + + Returns: + DataFrame + """ +``` + +### Categorical Data + +Functions for working with categorical data and creating dummy variables. + +```python { .api } +def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, columns=None, sparse=False, drop_first=False, dtype=None): + """ + Convert categorical variable(s) into dummy/indicator variables. + + Parameters: + - data: array-like, Series, or DataFrame + - prefix: str, list of str, or dict of str, string to append DataFrame column names + - prefix_sep: str, separator/delimiter to use when adding prefix + - dummy_na: bool, add column to indicate NaNs + - columns: list-like, column names in DataFrame to encode + - sparse: bool, return SparseArray (True) or NumPy array (False) + - drop_first: bool, remove first level of categorical variable + - dtype: dtype, data type for new columns + + Returns: + DataFrame + """ + +def from_dummies(data, sep=None, default_category=None): + """ + Create a categorical DataFrame from a DataFrame of dummy variables. + + Parameters: + - data: DataFrame, data of which to get dummy indicators + - sep: str, separator used in column names of dummy DataFrame + - default_category: None, str, or dict of str, name of column containing default category + + Returns: + DataFrame + """ + +def crosstab(index, columns, values=None, rownames=None, colnames=None, aggfunc=None, margins=False, margins_name='All', dropna=True, normalize=False): + """ + Compute a simple cross-tabulation of two (or more) factors. + + Parameters: + - index: array-like, values to group by in rows + - columns: array-like, values to group by in columns + - values: array-like, array of values to aggregate according to factors + - rownames: sequence, names for row index + - colnames: sequence, names for column index + - aggfunc: function, aggregation function to use + - margins: bool, add row/column margins (subtotals) + - dropna: bool, do not include columns with all NaN values + - normalize: bool, normalize by dividing all values by sum + + Returns: + DataFrame + """ +``` + +### Binning and Discretization + +Functions to bin continuous data into discrete intervals or quantile-based groups. + +```python { .api } +def cut(x, bins, right=True, labels=None, retbins=False, precision=3, include_lowest=False, duplicates='raise', ordered=True): + """ + Bin values into discrete intervals. + + Parameters: + - x: array-like, input array to be binned + - bins: int, sequence of scalars, or IntervalIndex, criteria to bin by + - right: bool, indicates whether bins include rightmost edge + - labels: array or bool, specifies labels for returned bins + - retbins: bool, return bins or not + - precision: int, precision at which to store and display bins labels + - include_lowest: bool, whether first interval should be left-inclusive + - duplicates: str, behavior when bin edges are not unique ('raise' or 'drop') + - ordered: bool, whether returned Categorical will be ordered + + Returns: + Categorical, Series, or array of intervals + """ + +def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'): + """ + Quantile-based discretization function. + + Parameters: + - x: array-like, input array to be binned + - q: int or list-like of float, number of quantiles or quantile boundaries + - labels: array or bool, used as labels for resulting bins + - retbins: bool, return (bins, labels) or not + - precision: int, precision at which to store and display bins labels + - duplicates: str, behavior when bin edges are not unique ('raise' or 'drop') + + Returns: + Categorical, Series, or array of intervals + """ +``` + +### DataFrame and Series Transformation + +Core methods for transforming and manipulating individual DataFrames and Series. + +```python { .api } +# DataFrame transformation methods (already covered in core-data-structures.md) +# These are methods of DataFrame/Series classes: + +# DataFrame.pivot(index=None, columns=None, values=None) +# DataFrame.transpose() / DataFrame.T +# DataFrame.stack(level=-1, dropna=True) +# DataFrame.unstack(level=-1, fill_value=None) +# DataFrame.explode(column, ignore_index=False) +# Series.explode(ignore_index=False) + +# Additional utility functions +def eval(expr, parser='pandas', engine=None, local_dict=None, global_dict=None, resolvers=(), level=0, target=None, inplace=False): + """ + Evaluate a Python expression as a string using pandas objects. + + Parameters: + - expr: str, expression to evaluate + - parser: str, parser to use ('pandas' or 'python') + - engine: str, engine to use ('python', 'numexpr') + - local_dict: dict, local variable scope + - global_dict: dict, global variable scope + - level: int, number of prior stack frames to traverse + - target: object, assign result to this variable + - inplace: bool, perform operation in-place + + Returns: + ndarray, numeric scalar, DataFrame, Series + """ +``` + +## Advanced Reshaping Patterns + +### MultiIndex Operations + +```python { .api } +# MultiIndex creation and manipulation +def MultiIndex.from_arrays(arrays, sortorder=None, names=None): + """Create MultiIndex from arrays.""" + +def MultiIndex.from_tuples(tuples, sortorder=None, names=None): + """Create MultiIndex from list of tuples.""" + +def MultiIndex.from_product(iterables, sortorder=None, names=None): + """Create MultiIndex from cartesian product of iterables.""" + +def MultiIndex.from_frame(df, sortorder=None, names=None): + """Create MultiIndex from DataFrame.""" + +# These are methods of DataFrames with MultiIndex: +# df.stack() - pivot columns to rows (wide to long) +# df.unstack() - pivot rows to columns (long to wide) +# df.swaplevel() - swap levels in MultiIndex +# df.reorder_levels() - rearrange index levels +``` + +### Advanced Merging + +```python { .api } +# Additional merge validation options +class MergeValidation: + ONE_TO_ONE = '1:1' + ONE_TO_MANY = '1:m' + MANY_TO_ONE = 'm:1' + MANY_TO_MANY = 'm:m' + +# Join methods (these are DataFrame methods) +# df.join(other, on=None, how='left', lsuffix='', rsuffix='', sort=False) +# df.combine(other, func, fill_value=None, overwrite=True) +# df.combine_first(other) - combine with other, using non-null values from calling DataFrame +# df.update(other, join='left', overwrite=True, filter_func=None, errors='ignore') +``` + +## Types + +```python { .api } +# Merge and join types +MergeHow = Literal['left', 'right', 'outer', 'inner', 'cross'] +JoinHow = Literal['left', 'right', 'outer', 'inner'] + +# Pivot aggregation functions +AggFunc = Union[str, Callable, List[Union[str, Callable]], Dict[str, Union[str, Callable]]] + +# Binning edge behavior +BinEdge = Literal['left', 'right'] + +# Categorical ordering +CategoricalOrdered = bool + +# Cross-tabulation normalization +NormalizeOptions = Union[bool, Literal['all', 'index', 'columns']] +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pandas/docs/data-types.md b/.tessl/tiles/tessl/pypi-pandas/docs/data-types.md new file mode 100644 index 0000000..7598243 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pandas/docs/data-types.md @@ -0,0 +1,632 @@ +# Data Types and Missing Data + +Extension data types, missing data handling, and type conversion utilities including nullable integer/boolean types, categorical data, and advanced missing value operations. + +## Core Imports + +```python +import pandas as pd +from pandas import isna, notna, Categorical, NA +``` + +## Capabilities + +### Missing Data Detection + +Functions to detect and handle missing values in pandas data structures. + +```python { .api } +def isna(obj): + """ + Detect missing values for an array-like object. + + Parameters: + - obj: scalar or array-like, object to check for null or missing values + + Returns: + bool or array-like of bool, boolean mask indicating missing values + """ + +def isnull(obj): + """ + Detect missing values for an array-like object. + + Alias for isna(). + + Parameters: + - obj: scalar or array-like, object to check for null or missing values + + Returns: + bool or array-like of bool, boolean mask indicating missing values + """ + +def notna(obj): + """ + Detect existing (non-missing) values. + + Parameters: + - obj: scalar or array-like, object to check for non-null values + + Returns: + bool or array-like of bool, boolean mask indicating non-missing values + """ + +def notnull(obj): + """ + Detect existing (non-missing) values. + + Alias for notna(). + + Parameters: + - obj: scalar or array-like, object to check for non-null values + + Returns: + bool or array-like of bool, boolean mask indicating non-missing values + """ +``` + +### Categorical Data + +Categorical data type for efficient storage and computation of repetitive data. + +```python { .api } +class Categorical: + def __init__(self, values, categories=None, ordered=None, dtype=None, fastpath=False): + """ + Represent a categorical variable in classic R / S-plus fashion. + + Parameters: + - values: list-like, values for the categorical + - categories: Index-like, unique categories for this categorical + - ordered: bool, whether categories have meaningful order + - dtype: CategoricalDtype, dtype for the categorical + """ + + def add_categories(self, new_categories, inplace=False): + """Add new categories.""" + + def remove_categories(self, removals, inplace=False): + """Remove categories.""" + + def rename_categories(self, new_categories, inplace=False): + """Rename categories.""" + + def reorder_categories(self, new_categories, ordered=None, inplace=False): + """Reorder categories.""" + + def remove_unused_categories(self, inplace=False): + """Remove categories not in use.""" + + def set_categories(self, new_categories, ordered=None, rename=False, inplace=False): + """Set categories to specified new_categories.""" + + def as_ordered(self, inplace=False): + """Set Categorical to be ordered.""" + + def as_unordered(self, inplace=False): + """Set Categorical to be unordered.""" + + @property + def categories(self): + """The categories of this categorical.""" + + @property + def ordered(self): + """Whether the categories have an ordered relationship.""" + + @property + def codes(self): + """The category codes of this categorical.""" + + def value_counts(self, sort=True, ascending=False, dropna=True): + """Return counts of each category.""" + +class CategoricalDtype: + def __init__(self, categories=None, ordered=None): + """ + Type for categorical data with categories and ordered attributes. + + Parameters: + - categories: sequence, categories for the dtype + - ordered: bool, whether the categories are ordered + """ + + @property + def categories(self): + """Categorical categories.""" + + @property + def ordered(self): + """Whether categories are ordered.""" +``` + +### Extension Data Types + +Specialized data types that extend pandas' capabilities beyond NumPy types. + +```python { .api } +class StringDtype: + def __init__(self, storage=None): + """ + Extension dtype for string data. + + Parameters: + - storage: str, storage type ('python' or 'pyarrow') + """ + +class BooleanDtype: + def __init__(self): + """Extension dtype for boolean data with missing value support.""" + +class Int8Dtype: + def __init__(self): + """Extension dtype for nullable 8-bit integer data.""" + +class Int16Dtype: + def __init__(self): + """Extension dtype for nullable 16-bit integer data.""" + +class Int32Dtype: + def __init__(self): + """Extension dtype for nullable 32-bit integer data.""" + +class Int64Dtype: + def __init__(self): + """Extension dtype for nullable 64-bit integer data.""" + +class UInt8Dtype: + def __init__(self): + """Extension dtype for nullable 8-bit unsigned integer data.""" + +class UInt16Dtype: + def __init__(self): + """Extension dtype for nullable 16-bit unsigned integer data.""" + +class UInt32Dtype: + def __init__(self): + """Extension dtype for nullable 32-bit unsigned integer data.""" + +class UInt64Dtype: + def __init__(self): + """Extension dtype for nullable 64-bit unsigned integer data.""" + +class Float32Dtype: + def __init__(self): + """Extension dtype for nullable 32-bit floating point data.""" + +class Float64Dtype: + def __init__(self): + """Extension dtype for nullable 64-bit floating point data.""" + +class PeriodDtype: + def __init__(self, freq=None): + """ + Extension dtype for Period data. + + Parameters: + - freq: str or DateOffset, frequency of the Period + """ + +class IntervalDtype: + def __init__(self, subtype=None, closed=None): + """ + Extension dtype for Interval data. + + Parameters: + - subtype: str or numpy dtype, subtype of interval + - closed: str, whether intervals are closed ('left', 'right', 'both', 'neither') + """ + +class DatetimeTZDtype: + def __init__(self, tz=None, unit='ns'): + """ + Extension dtype for timezone-aware datetime data. + + Parameters: + - tz: str or tzinfo, timezone information + - unit: str, unit of precision ('ns', 'us', 'ms', 's') + """ + +class SparseDtype: + def __init__(self, dtype=numpy.float64, fill_value=None): + """ + Extension dtype for sparse data. + + Parameters: + - dtype: str, numpy.dtype, ExtensionDtype, the dtype of non-sparse values + - fill_value: scalar, value used for sparse locations + """ +``` + +### Arrow Integration + +Apache Arrow-backed data types for improved performance and interoperability. + +```python { .api } +class ArrowDtype: + def __init__(self, pyarrow_dtype): + """ + Extension dtype for PyArrow data types. + + Parameters: + - pyarrow_dtype: pyarrow.DataType, PyArrow data type + """ + + @property + def pyarrow_dtype(self): + """Return the PyArrow data type.""" + + @property + def name(self): + """Return the name of the data type.""" + + @property + def type(self): + """Return the scalar type for the array.""" +``` + +### Array Creation and Conversion + +Functions to create pandas arrays and convert between different array types. + +```python { .api } +def array(data, dtype=None, copy=True): + """ + Create an ExtensionArray from the input data. + + Parameters: + - data: Sequence, 1-dimensional list, Series, Index, or ExtensionArray + - dtype: str, np.dtype, or ExtensionDtype, dtype for the array + - copy: bool, whether to copy the data + + Returns: + ExtensionArray, newly created array + """ + +def factorize(values, sort=False, na_sentinel=-1, use_na_sentinel=True, size_hint=None): + """ + Encode the object as an enumerated type or categorical variable. + + Parameters: + - values: sequence, 1-d array-like + - sort: bool, sort uniques + - na_sentinel: int, value to mark missing values + - use_na_sentinel: bool, use na_sentinel for missing values + - size_hint: int, hint to the hashtable sizer + + Returns: + tuple of (codes, uniques) + """ + +def unique(values): + """ + Return unique values based on a hash table. + + Parameters: + - values: 1d array-like + + Returns: + ndarray or ExtensionArray, unique values + """ + +def value_counts(values, sort=True, ascending=False, normalize=False, bins=None, dropna=True): + """ + Compute a histogram of the 1D array values. + + Parameters: + - values: 1d array-like + - sort: bool, sort by values + - ascending: bool, sort in ascending order + - normalize: bool, return relative frequencies + - bins: int, rather than count values, group them into half-open bins + - dropna: bool, don't include counts of NaN + + Returns: + Series + """ +``` + +### Type Checking Functions + +Functions to check data types and properties of pandas objects. + +```python { .api } +# Available in pandas.api.types +def infer_dtype(value, skipna=True): + """ + Efficiently infer the type of a passed val. + + Parameters: + - value: object, object whose type is to be inferred + - skipna: bool, ignore NaN values when inferring type + + Returns: + str, type of the object + """ + +def is_any_real_numeric_dtype(arr_or_dtype): + """Check whether the provided array or dtype is a real number data type.""" + +def is_bool_dtype(arr_or_dtype): + """Check whether the provided array or dtype is a boolean data type.""" + +def is_categorical_dtype(arr_or_dtype): + """Check whether the provided array or dtype is Categorical data type.""" + +def is_complex_dtype(arr_or_dtype): + """Check whether the provided array or dtype is a complex data type.""" + +def is_datetime64_any_dtype(arr_or_dtype): + """Check whether the provided array or dtype is datetime64 data type.""" + +def is_datetime64_dtype(arr_or_dtype): + """Check whether the provided array or dtype is datetime64[ns] data type.""" + +def is_datetime64_ns_dtype(arr_or_dtype): + """Check whether the provided array or dtype is datetime64[ns] data type.""" + +def is_datetime64tz_dtype(arr_or_dtype): + """Check whether the provided array or dtype has a timezone-aware datetime64 data type.""" + +def is_extension_array_dtype(arr_or_dtype): + """Check whether the provided array or dtype is an extension data type.""" + +def is_float_dtype(arr_or_dtype): + """Check whether the provided array or dtype is a float data type.""" + +def is_integer_dtype(arr_or_dtype): + """Check whether the provided array or dtype is an integer data type.""" + +def is_interval_dtype(arr_or_dtype): + """Check whether the provided array or dtype is Interval data type.""" + +def is_numeric_dtype(arr_or_dtype): + """Check whether the provided array or dtype is a numeric data type.""" + +def is_object_dtype(arr_or_dtype): + """Check whether the provided array or dtype is object data type.""" + +def is_period_dtype(arr_or_dtype): + """Check whether the provided array or dtype is Period data type.""" + +def is_signed_integer_dtype(arr_or_dtype): + """Check whether the provided array or dtype is a signed integer data type.""" + +def is_string_dtype(arr_or_dtype): + """Check whether the provided array or dtype is a string data type.""" + +def is_timedelta64_dtype(arr_or_dtype): + """Check whether the provided array or dtype is timedelta64 data type.""" + +def is_timedelta64_ns_dtype(arr_or_dtype): + """Check whether the provided array or dtype is timedelta64[ns] data type.""" + +def is_unsigned_integer_dtype(arr_or_dtype): + """Check whether the provided array or dtype is an unsigned integer data type.""" + +def pandas_dtype(dtype): + """ + Convert input into a pandas only dtype object or a numpy dtype object. + + Parameters: + - dtype: object to be converted + + Returns: + np.dtype or pandas dtype + """ +``` + +### Extension Arrays + +Specialized array classes that provide the foundation for extension data types. + +```python { .api } +class BooleanArray: + def __init__(self, values, mask, copy=False): + """ + Array of boolean (True/False) data with missing values. + + Parameters: + - values: numpy.ndarray, boolean array + - mask: numpy.ndarray, boolean array indicating missing values + - copy: bool, copy the input arrays + """ + +class IntegerArray: + def __init__(self, values, mask, copy=False): + """ + Array of integer values with missing value support. + + Parameters: + - values: numpy.ndarray, integer array + - mask: numpy.ndarray, boolean array indicating missing values + - copy: bool, copy the input arrays + """ + +class FloatingArray: + def __init__(self, values, mask, copy=False): + """ + Array of floating point values with missing value support. + + Parameters: + - values: numpy.ndarray, float array + - mask: numpy.ndarray, boolean array indicating missing values + - copy: bool, copy the input arrays + """ + +class StringArray: + def __init__(self, values, copy=False): + """ + Extension array for string data in a pandas Series or DataFrame. + + Parameters: + - values: array-like, sequence of strings + - copy: bool, copy the input array + """ + +class IntervalArray: + def __init__(self, data, closed=None, dtype=None, copy=False, verify_integrity=True): + """ + Pandas array for interval data that are closed on the same side. + + Parameters: + - data: array-like (1-dimensional), array of Interval objects + - closed: str, whether intervals are closed ('left', 'right', 'both', 'neither') + - dtype: IntervalDtype, dtype for the IntervalArray + - copy: bool, copy the input data + - verify_integrity: bool, verify data integrity + """ + +class PeriodArray: + def __init__(self, values, dtype=None, freq=None, copy=False): + """ + Pandas array for storing Period data. + + Parameters: + - values: Union[PeriodArray, Series[period], ndarray[int], PeriodIndex] + - dtype: PeriodDtype, optional + - freq: str or period object, frequency + - copy: bool, copy the input data + """ + +class DatetimeArray: + def __init__(self, values, dtype=None, freq=None, copy=False): + """ + Pandas array for datetime64 data. + + Parameters: + - values: Series, Index, DatetimeArray, ndarray + - dtype: numpy.dtype or DatetimeTZDtype + - freq: str or Offset + - copy: bool, copy the input data + """ + +class TimedeltaArray: + def __init__(self, values, dtype=None, freq=None, copy=False): + """ + Pandas array for timedelta64 data. + + Parameters: + - values: array-like, sequence of timedelta-like objects + - dtype: numpy.dtype + - freq: str or Offset + - copy: bool, copy the input data + """ + +class SparseArray: + def __init__(self, data, sparse_index=None, fill_value=None, kind='integer', dtype=None, copy=False): + """ + An ExtensionArray for storing sparse data. + + Parameters: + - data: array-like or scalar + - sparse_index: SparseIndex, locations of non-fill_value entries + - fill_value: scalar, entries matching this value are omitted from representation + - kind: str, sparse index kind ('integer' or 'block') + - dtype: numpy.dtype + - copy: bool, copy the input data + """ +``` + +## Advanced Type Operations + +### Categorical Utilities + +```python { .api } +def union_categoricals(to_union, sort_categories=False, ignore_order=False): + """ + Combine list-like of Categorical-like into a single Categorical. + + Parameters: + - to_union: list-like, Categorical, CategoricalIndex, or Series with categorical dtype + - sort_categories: bool, sort resulting categories + - ignore_order: bool, ignore category order + + Returns: + Categorical + """ + +def concat_categoricals(to_concat, axis=0, join='outer', ignore_index=False): + """ + Concatenate Categoricals. + + Parameters: + - to_concat: list of Categoricals + - axis: int, axis to concatenate along + - join: str, join method for categories + - ignore_index: bool, reset index in result + + Returns: + Categorical + """ +``` + +### Nullable Integer Construction + +```python { .api } +# Constructor functions for nullable integer arrays +def Int8Array(values, mask=None, copy=False): + """Construct Int8Array.""" + +def Int16Array(values, mask=None, copy=False): + """Construct Int16Array.""" + +def Int32Array(values, mask=None, copy=False): + """Construct Int32Array.""" + +def Int64Array(values, mask=None, copy=False): + """Construct Int64Array.""" + +def UInt8Array(values, mask=None, copy=False): + """Construct UInt8Array.""" + +def UInt16Array(values, mask=None, copy=False): + """Construct UInt16Array.""" + +def UInt32Array(values, mask=None, copy=False): + """Construct UInt32Array.""" + +def UInt64Array(values, mask=None, copy=False): + """Construct UInt64Array.""" +``` + +## Types + +```python { .api } +# Missing value sentinels +NA: object # Pandas missing value for extension dtypes +NaT: object # Not-a-Time for datetime/timedelta + +# Extension dtype base classes +class ExtensionDtype: + """Base class for custom data types.""" + + @property + def name(self): + """Return a string representation of the dtype.""" + + @property + def type(self): + """Return the scalar type for the array.""" + + @classmethod + def construct_from_string(cls, string): + """Construct this type from a string.""" + +# Categorical ordering +CategoricalOrdering = bool + +# Dtype inference results +InferredType = Literal[ + 'boolean', 'integer', 'floating', 'complex', 'string', 'unicode', + 'mixed', 'mixed-integer', 'mixed-integer-float', 'decimal', + 'datetime', 'datetime64', 'timedelta', 'timedelta64', + 'period', 'categorical', 'interval', 'bytes', 'empty' +] + +# Arrow dtype string representations +ArrowDtypeStr = str # PyArrow dtype string like 'int64[pyarrow]' + +# Sparse array kinds +SparseKind = Literal['integer', 'block'] +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pandas/docs/errors.md b/.tessl/tiles/tessl/pypi-pandas/docs/errors.md new file mode 100644 index 0000000..fd7e14f --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pandas/docs/errors.md @@ -0,0 +1,540 @@ +# Pandas Errors and Warnings + +The `pandas.errors` module provides comprehensive exception classes and warnings for error handling in pandas operations. These help developers identify and handle specific issues that can arise during data manipulation, file I/O, and analysis tasks. + +## Core Imports + +```python +import pandas as pd +from pandas import errors +from pandas.errors import ( + # Data Type Errors + IntCastingNaNError, + DtypeWarning, + + # Parsing and I/O Errors + ParserError, + ParserWarning, + EmptyDataError, + + # Index and Data Structure Errors + UnsortedIndexError, + InvalidIndexError, + IndexingError, + DuplicateLabelError, + + # Performance and Operation Warnings + PerformanceWarning, + SettingWithCopyWarning, + SettingWithCopyError, + ChainedAssignmentError, + + # Computation and Analysis Errors + DataError, + SpecificationError, + MergeError, + + # Frequency and Time Series Errors + NullFrequencyError, + OutOfBoundsDatetime, + OutOfBoundsTimedelta, + + # Engine and Backend Errors + NumbaUtilError, + NumExprClobberingError, + UndefinedVariableError, + UnsupportedFunctionCall, + + # File Format Specific Errors + DatabaseError, + PossibleDataLossError, + ClosedFileError, + PyperclipException, + PyperclipWindowsException, + + # Development and Internal Errors + AbstractMethodError, + InvalidComparison, + LossySetitemError, + NoBufferPresent, +) +``` + +## Data Type and Conversion Errors + +### Type Casting Errors + +**IntCastingNaNError** { .api } +```python +class IntCastingNaNError(ValueError) +``` +Exception raised when converting an array with NaN values to integer type using `astype()`. + +```python +# Example that raises IntCastingNaNError +import numpy as np +df = pd.DataFrame(np.array([[1, np.nan], [2, 3]]), dtype="i8") +# IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer +``` + +**DtypeWarning** { .api } +```python +class DtypeWarning(Warning) +``` +Warning issued when `read_csv` or `read_table` encounter mixed data types in columns, typically when processing large files in chunks. + +```python +# Mixed types in column trigger DtypeWarning +df = pd.DataFrame({'a': (['1'] * 100000 + ['X'] * 100000 + ['1'] * 100000)}) +df.to_csv('mixed_types.csv', index=False) +df2 = pd.read_csv('mixed_types.csv') # DtypeWarning: Columns (0) have mixed types +``` + +## Parsing and I/O Errors + +### File Reading Errors + +**ParserError** { .api } +```python +class ParserError(ValueError) +``` +Generic exception for parsing errors in file reading functions like `read_csv` and `read_html`. + +```python +# Malformed CSV data +data = '''a,b,c +cat,foo,bar +dog,foo,"baz''' +from io import StringIO +pd.read_csv(StringIO(data), skipfooter=1, engine='python') +# ParserError: ',' expected after '"' +``` + +**ParserWarning** { .api } +```python +class ParserWarning(Warning) +``` +Warning when pandas falls back from the 'c' parser to 'python' parser due to unsupported options. + +```python +# Using regex separator triggers ParserWarning +csv_data = '''a;b;c +1;1,8 +1;2,1''' +df = pd.read_csv(StringIO(csv_data), sep='[;,]') # ParserWarning: Falling back to 'python' engine +``` + +**EmptyDataError** { .api } +```python +class EmptyDataError(ValueError) +``` +Exception raised when `read_csv` encounters empty data or headers. + +```python +from io import StringIO +empty = StringIO() +pd.read_csv(empty) # EmptyDataError: No columns to parse from file +``` + +## Index and Data Structure Errors + +### Index Management Errors + +**UnsortedIndexError** { .api } +```python +class UnsortedIndexError(KeyError) +``` +Error when slicing a MultiIndex that hasn't been lexicographically sorted. + +```python +# MultiIndex slicing without sorting +df = pd.DataFrame({"cat": [0, 0, 1, 1], "color": ["white", "white", "brown", "black"]}) +df = df.set_index(["cat", "color"]) +df.loc[(0, "black"):(1, "white")] # UnsortedIndexError: Key length was greater than lexsort depth +``` + +**InvalidIndexError** { .api } +```python +class InvalidIndexError(Exception) +``` +Exception for invalid index key usage, particularly with MultiIndex operations. + +```python +# Invalid MultiIndex access +idx = pd.MultiIndex.from_product([["x", "y"], [0, 1]]) +df = pd.DataFrame([[1, 1, 2, 2], [3, 3, 4, 4]], columns=idx) +df[:, 0] # InvalidIndexError: (slice(None, None, None), 0) +``` + +**IndexingError** { .api } +```python +class IndexingError(Exception) +``` +Exception for dimension mismatches and invalid indexing operations. + +```python +df = pd.DataFrame({'A': [1, 1, 1]}) +df.loc[..., ..., 'A'] # IndexingError: indexer may only contain one '...' entry +``` + +**DuplicateLabelError** { .api } +```python +class DuplicateLabelError(ValueError) +``` +Error when operations would introduce duplicate labels on objects with `allows_duplicate_labels=False`. + +```python +s = pd.Series([0, 1, 2], index=['a', 'b', 'c']).set_flags(allows_duplicate_labels=False) +s.reindex(['a', 'a', 'b']) # DuplicateLabelError: Index has duplicates +``` + +## Performance and Copy Warnings + +### Assignment and Copy Behavior + +**PerformanceWarning** { .api } +```python +class PerformanceWarning(Warning) +``` +Warning for operations that may impact performance, such as indexing past lexsort depth. + +```python +# MultiIndex performance warning +df = pd.DataFrame({"jim": [0, 0, 1, 1], "joe": ["x", "x", "z", "y"]}) +df = df.set_index(["jim", "joe"]) +df.loc[(1, 'z')] # PerformanceWarning: indexing past lexsort depth may impact performance +``` + +**SettingWithCopyWarning** { .api } +```python +class SettingWithCopyWarning(Warning) +``` +Warning when setting values on a copied slice from a DataFrame (chained assignment). + +```python +df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}) +df.loc[0:3]['A'] = 'a' # SettingWithCopyWarning: A value is trying to be set on a copy +``` + +**SettingWithCopyError** { .api } +```python +class SettingWithCopyError(ValueError) +``` +Exception version of SettingWithCopyWarning when `mode.chained_assignment` is set to 'raise'. + +```python +pd.options.mode.chained_assignment = 'raise' +df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}) +df.loc[0:3]['A'] = 'a' # SettingWithCopyError: A value is trying to be set on a copy +``` + +**ChainedAssignmentError** { .api } +```python +class ChainedAssignmentError(Warning) +``` +Warning for chained assignment when Copy-on-Write mode is enabled, indicating the assignment won't update the original object. + +```python +pd.options.mode.copy_on_write = True +df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}) +df["A"][0:3] = 10 # ChainedAssignmentError: chained assignment never works with Copy-on-Write +``` + +## Computation and Analysis Errors + +### Data Operation Errors + +**DataError** { .api } +```python +class DataError(Exception) +``` +Exception for operations on non-numerical data where numerical data is required. + +```python +ser = pd.Series(['a', 'b', 'c']) +ser.rolling(2).sum() # DataError: No numeric types to aggregate +``` + +**SpecificationError** { .api } +```python +class SpecificationError(Exception) +``` +Exception raised by `agg()` when aggregation functions are incorrectly specified. + +```python +df = pd.DataFrame({'A': [1, 1, 1, 2, 2], 'B': range(5)}) +df.groupby('A').B.agg({'foo': 'count'}) # SpecificationError: nested renamer is not supported +``` + +**MergeError** { .api } +```python +class MergeError(ValueError) +``` +Exception during DataFrame merge operations, particularly validation failures. + +```python +left = pd.DataFrame({"a": ["a", "b", "b", "d"], "b": ["cat", "dog", "weasel", "horse"]}) +right = pd.DataFrame({"a": ["a", "b", "c", "d"], "c": ["meow", "bark", "chirp", "nay"]}).set_index("a") +left.join(right, on="a", validate="one_to_one") # MergeError: Merge keys are not unique in left dataset +``` + +## Frequency and Time Series Errors + +### Temporal Data Errors + +**NullFrequencyError** { .api } +```python +class NullFrequencyError(ValueError) +``` +Exception when a frequency cannot be null for time series operations like `shift()`. + +```python +df = pd.DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None) +df.shift(2) # NullFrequencyError: Cannot shift with no freq +``` + +**OutOfBoundsDatetime** { .api } +```python +class OutOfBoundsDatetime(ValueError) +``` +Exception for datetime values outside pandas' supported range (imported from pandas._libs.tslibs). + +**OutOfBoundsTimedelta** { .api } +```python +class OutOfBoundsTimedelta(ValueError) +``` +Exception for timedelta values outside pandas' supported range (imported from pandas._libs.tslibs). + +## Engine and Backend Errors + +### Computational Engine Errors + +**NumbaUtilError** { .api } +```python +class NumbaUtilError(Exception) +``` +Error for unsupported Numba engine routines in pandas operations. + +```python +df = pd.DataFrame({"key": ["a", "a", "b", "b"], "data": [1, 2, 3, 4]}) +def incorrect_function(x): + return sum(x) * 2.7 +df.groupby("key").agg(incorrect_function, engine="numba") # NumbaUtilError: first 2 arguments must be ['values', 'index'] +``` + +**NumExprClobberingError** { .api } +```python +class NumExprClobberingError(NameError) +``` +Exception when using built-in numexpr names as variable names in `eval()` or `query()`. + +```python +df = pd.DataFrame({'abs': [1, 1, 1]}) +df.query("abs > 2") # NumExprClobberingError: Variables overlap with builtins +``` + +**UndefinedVariableError** { .api } +```python +class UndefinedVariableError(NameError) +``` +Exception for undefined variable names in `query()` or `eval()` expressions. + +```python +df = pd.DataFrame({'A': [1, 1, 1]}) +df.query("A > x") # UndefinedVariableError: name 'x' is not defined +``` + +**UnsupportedFunctionCall** { .api } +```python +class UnsupportedFunctionCall(ValueError) +``` +Exception for calling unsupported NumPy functions on pandas objects. + +```python +df = pd.DataFrame({"A": [0, 0, 1, 1], "B": ["x", "x", "z", "y"]}) +import numpy as np +np.cumsum(df.groupby(["A"])) # UnsupportedFunctionCall: numpy operations not valid with groupby +``` + +## File Format Specific Errors + +### Database and Storage Errors + +**DatabaseError** { .api } +```python +class DatabaseError(OSError) +``` +Error when executing SQL with bad syntax or database errors. + +```python +from sqlite3 import connect +conn = connect(':memory:') +pd.read_sql('select * test', conn) # DatabaseError: Execution failed on sql +``` + +**PossibleDataLossError** { .api } +```python +class PossibleDataLossError(Exception) +``` +Exception when trying to open an HDFStore file that's already opened with a different mode. + +**ClosedFileError** { .api } +```python +class ClosedFileError(Exception) +``` +Exception when performing operations on a closed HDFStore file. + +### Clipboard and System Integration + +**PyperclipException** { .api } +```python +class PyperclipException(RuntimeError) +``` +Exception for unsupported clipboard functionality in `to_clipboard()` and `read_clipboard()`. + +**PyperclipWindowsException** { .api } +```python +class PyperclipWindowsException(PyperclipException) +``` +Windows-specific exception when clipboard access is denied due to other processes. + +## File Format Warnings + +### HDF5 and Storage Warnings + +**IncompatibilityWarning** { .api } +```python +class IncompatibilityWarning(Warning) +``` +Warning for incompatible HDF5 file operations with where criteria. + +**AttributeConflictWarning** { .api } +```python +class AttributeConflictWarning(Warning) +``` +Warning when index attributes conflict during HDFStore operations. + +### Stata File Warnings + +**PossiblePrecisionLoss** { .api } +```python +class PossiblePrecisionLoss(Warning) +``` +Warning when `to_stata()` converts int64 values to float64 due to range limitations. + +**ValueLabelTypeMismatch** { .api } +```python +class ValueLabelTypeMismatch(Warning) +``` +Warning when Stata export encounters non-string category values. + +**InvalidColumnName** { .api } +```python +class InvalidColumnName(Warning) +``` +Warning when column names are invalid Stata variables and need conversion. + +**CategoricalConversionWarning** { .api } +```python +class CategoricalConversionWarning(Warning) +``` +Warning when reading partially labeled Stata files with iterators. + +### Style and Formatting Warnings + +**CSSWarning** { .api } +```python +class CSSWarning(UserWarning) +``` +Warning when CSS styling conversion fails or encounters unhandled formats. + +```python +df = pd.DataFrame({'A': [1, 1, 1]}) +df.style.applymap(lambda x: 'background-color: blueGreenRed;').to_excel('styled.xlsx') +# CSSWarning: Unhandled color format: 'blueGreenRed' +``` + +## Development and Internal Errors + +### Abstract Method and Development Errors + +**AbstractMethodError** { .api } +```python +class AbstractMethodError(NotImplementedError) + def __init__(self, class_instance, methodtype: str = "method") -> None +``` +Error for abstract methods that must be implemented in concrete classes. Supports different method types: 'method', 'classmethod', 'staticmethod', 'property'. + +```python +class Foo: + @classmethod + def classmethod(cls): + raise pd.errors.AbstractMethodError(cls, methodtype="classmethod") + + def method(self): + raise pd.errors.AbstractMethodError(self) + +Foo.classmethod() # AbstractMethodError: This classmethod must be defined in the concrete class Foo +``` + +### Internal Implementation Errors + +**InvalidComparison** { .api } +```python +class InvalidComparison(Exception) +``` +Internal exception for invalid comparison operations (internal use only). + +**LossySetitemError** { .api } +```python +class LossySetitemError(Exception) +``` +Internal exception for non-lossless `__setitem__` operations on numpy arrays (internal use only). + +**NoBufferPresent** { .api } +```python +class NoBufferPresent(Exception) +``` +Internal exception signaling absence of requested buffer in `_get_data_buffer` (internal use only). + +## Configuration Errors + +### Options and Configuration + +**OptionError** { .api } +```python +class OptionError(AttributeError) +``` +Exception for pandas configuration option errors (imported from pandas._config.config). + +**InvalidVersion** { .api } +```python +class InvalidVersion(ValueError) +``` +Exception for invalid version strings (imported from pandas.util.version). + +## Type Definitions + +```python +# Error Categories +DataTypeError = Union[IntCastingNaNError, DtypeWarning] +ParsingError = Union[ParserError, ParserWarning, EmptyDataError] +IndexError = Union[UnsortedIndexError, InvalidIndexError, IndexingError, DuplicateLabelError] +CopyWarning = Union[PerformanceWarning, SettingWithCopyWarning, SettingWithCopyError, ChainedAssignmentError] +ComputationError = Union[DataError, SpecificationError, MergeError] +TimeSeriesError = Union[NullFrequencyError, OutOfBoundsDatetime, OutOfBoundsTimedelta] +EngineError = Union[NumbaUtilError, NumExprClobberingError, UndefinedVariableError, UnsupportedFunctionCall] +FileFormatError = Union[DatabaseError, PossibleDataLossError, ClosedFileError, PyperclipException] +FormatWarning = Union[IncompatibilityWarning, AttributeConflictWarning, PossiblePrecisionLoss, ValueLabelTypeMismatch, InvalidColumnName, CategoricalConversionWarning, CSSWarning] +DevelopmentError = Union[AbstractMethodError, InvalidComparison, LossySetitemError, NoBufferPresent] +ConfigurationError = Union[OptionError, InvalidVersion] + +# All pandas errors and warnings +PandasError = Union[ + DataTypeError, ParsingError, IndexError, CopyWarning, ComputationError, + TimeSeriesError, EngineError, FileFormatError, FormatWarning, + DevelopmentError, ConfigurationError +] +``` + +The pandas.errors module provides comprehensive error handling for all aspects of pandas operations, from basic data type conversions to complex multi-index operations and file I/O. Understanding these error types helps in writing robust pandas applications with proper exception handling and performance optimization. \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pandas/docs/index.md b/.tessl/tiles/tessl/pypi-pandas/docs/index.md new file mode 100644 index 0000000..499a357 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pandas/docs/index.md @@ -0,0 +1,238 @@ +# Pandas + +Pandas is a comprehensive Python data analysis library that provides powerful, flexible, and expressive data structures designed for working with structured and time series data. It offers extensive functionality for data manipulation, cleaning, transformation, and analysis including data alignment, merging, reshaping, grouping, and statistical operations. + +## Package Information + +- **Package Name**: pandas +- **Package Type**: library +- **Language**: Python +- **Installation**: `pip install pandas` + +## Core Imports + +```python +import pandas as pd +``` + +Common imports for specific functionality: + +```python +import pandas as pd +from pandas import DataFrame, Series, Index +``` + +## Basic Usage + +```python +import pandas as pd +import numpy as np + +# Create a DataFrame from dictionary +data = { + 'name': ['Alice', 'Bob', 'Charlie', 'Diana'], + 'age': [25, 30, 35, 28], + 'city': ['New York', 'London', 'Tokyo', 'Paris'], + 'salary': [50000, 60000, 70000, 55000] +} +df = pd.DataFrame(data) + +# Basic operations +print(df.head()) # Display first 5 rows +print(df.info()) # Display DataFrame info +print(df.describe()) # Statistical summary + +# Data selection and filtering +young_employees = df[df['age'] < 30] +high_earners = df[df['salary'] > 55000] + +# Create a Series +ages = pd.Series([25, 30, 35, 28], name='ages') +print(ages.mean()) # Calculate mean age + +# Read data from files +df_csv = pd.read_csv('data.csv') +df_excel = pd.read_excel('data.xlsx') + +# Basic data manipulation +df['bonus'] = df['salary'] * 0.1 # Add new column +df_sorted = df.sort_values('salary') # Sort by salary +df_grouped = df.groupby('city')['salary'].mean() # Group and aggregate +``` + +## Architecture + +Pandas is built around three fundamental data structures: + +- **Series**: One-dimensional labeled array capable of holding any data type +- **DataFrame**: Two-dimensional labeled data structure with heterogeneous columns +- **Index**: Immutable sequence used for indexing and alignment + +The library integrates seamlessly with NumPy, providing optimized performance through vectorized operations, and serves as the foundation for the Python data science ecosystem, including integration with Jupyter notebooks, matplotlib, scikit-learn, and hundreds of domain-specific analysis libraries. + +## Capabilities + +### Core Data Structures + +The fundamental data structures that form the foundation of pandas: DataFrame, Series, and various Index types. These structures provide the building blocks for all data manipulation operations. + +```python { .api } +class DataFrame: + def __init__(self, data=None, index=None, columns=None, dtype=None, copy=None): ... + +class Series: + def __init__(self, data=None, index=None, dtype=None, name=None, copy=None, fastpath=False): ... + +class Index: + def __init__(self, data=None, dtype=None, copy=False, name=None, tupleize_cols=True): ... +``` + +[Core Data Structures](./core-data-structures.md) + +### Data Input/Output + +Comprehensive I/O capabilities for reading and writing data in various formats including CSV, Excel, JSON, SQL databases, HDF5, Parquet, and many statistical file formats. + +```python { .api } +def read_csv(filepath_or_buffer, **kwargs): ... +def read_excel(io, **kwargs): ... +def read_json(path_or_buf, **kwargs): ... +def read_sql(sql, con, **kwargs): ... +def read_parquet(path, **kwargs): ... +``` + +[Data Input/Output](./data-io.md) + +### Data Manipulation and Reshaping + +Functions for combining, reshaping, and transforming data including merging, concatenation, pivoting, melting, and advanced data restructuring operations. + +```python { .api } +def concat(objs, axis=0, join='outer', **kwargs): ... +def merge(left, right, how='inner', on=None, **kwargs): ... +def pivot_table(data, values=None, index=None, columns=None, **kwargs): ... +def melt(data, id_vars=None, value_vars=None, **kwargs): ... +``` + +[Data Manipulation](./data-manipulation.md) + +### Time Series and Date Handling + +Comprehensive time series functionality including date/time parsing, time zone handling, frequency conversion, resampling, and specialized time-based operations. + +```python { .api } +def date_range(start=None, end=None, periods=None, freq=None, **kwargs): ... +def to_datetime(arg, **kwargs): ... +class Timestamp: + def __init__(self, ts_input=None, freq=None, tz=None, **kwargs): ... +``` + +[Time Series](./time-series.md) + +### Data Types and Missing Data + +Extension data types, missing data handling, and type conversion utilities including nullable integer/boolean types, categorical data, and advanced missing value operations. + +```python { .api } +def isna(obj): ... +def notna(obj): ... +class Categorical: + def __init__(self, values, categories=None, ordered=None, dtype=None, fastpath=False): ... +``` + +[Data Types](./data-types.md) + +### Statistical and Mathematical Operations + +Built-in statistical functions, mathematical operations, and data analysis utilities including descriptive statistics, correlation analysis, and numerical computations. + +```python { .api } +def cut(x, bins, **kwargs): ... +def qcut(x, q, **kwargs): ... +def factorize(values, **kwargs): ... +def value_counts(values, **kwargs): ... +``` + +[Statistics and Math](./statistics-math.md) + +### Configuration and Options + +Pandas configuration system for controlling display options, computational behavior, and library-wide settings. + +```python { .api } +def get_option(pat): ... +def set_option(pat, value): ... +def reset_option(pat): ... +def option_context(*args): ... +``` + +[Configuration](./configuration.md) + +### Plotting and Visualization + +Comprehensive plotting capabilities including basic plot types, statistical visualizations, and advanced multivariate analysis plots built on matplotlib. + +```python { .api } +def scatter_matrix(frame, **kwargs): ... +def parallel_coordinates(frame, class_column, **kwargs): ... +def andrews_curves(frame, class_column, **kwargs): ... +def radviz(frame, class_column, **kwargs): ... +``` + +[Plotting](./plotting.md) + +### API and Type Checking + +Type checking utilities and data type validation functions for working with pandas data structures and ensuring data quality. + +```python { .api } +def is_numeric_dtype(arr_or_dtype): ... +def is_datetime64_dtype(arr_or_dtype): ... +def is_categorical_dtype(arr_or_dtype): ... +def infer_dtype(value, **kwargs): ... +``` + +[API Types](./api-types.md) + +### Error Handling + +Exception and warning classes for proper error handling in pandas applications, including parsing errors, performance warnings, and data validation errors. + +```python { .api } +class ParserError(ValueError): ... +class PerformanceWarning(Warning): ... +class SettingWithCopyWarning(Warning): ... +class DtypeWarning(Warning): ... +``` + +[Errors](./errors.md) + +## Types + +```python { .api } +# Core scalar types +class Timestamp: + """Pandas timestamp object.""" + pass + +class Timedelta: + """Pandas timedelta object.""" + pass + +class Period: + """Pandas period object.""" + pass + +class Interval: + """Pandas interval object.""" + pass + +# Missing value sentinels +NA: object # Pandas missing value sentinel +NaT: object # Not-a-Time for datetime/timedelta + +# Common type aliases +Scalar = Union[str, int, float, bool, Timestamp, Timedelta, Period, Interval] +ArrayLike = Union[list, tuple, np.ndarray, Series, Index] +Axes = Union[int, str, Sequence[Union[int, str]]] +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pandas/docs/plotting.md b/.tessl/tiles/tessl/pypi-pandas/docs/plotting.md new file mode 100644 index 0000000..a0450a7 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pandas/docs/plotting.md @@ -0,0 +1,478 @@ +# Data Visualization and Plotting + +Comprehensive plotting and visualization capabilities using matplotlib backend, including statistical plots, multivariate visualizations, and DataFrame/Series plotting methods for creating publication-ready charts and graphs. + +## Core Imports + +```python +import pandas as pd +from pandas import plotting +from pandas.plotting import ( + scatter_matrix, radviz, andrews_curves, parallel_coordinates, + bootstrap_plot, lag_plot, autocorrelation_plot, table +) +``` + +## Capabilities + +### DataFrame and Series Plotting Methods + +Primary plotting interface available through `.plot` accessor on DataFrame and Series objects. + +```python { .api } +class PlotAccessor: + """ + Make plots of Series or DataFrame using matplotlib backend. + + Parameters: + - data: Series or DataFrame, the object for which the method is called + - x: label or position, only used if data is a DataFrame + - y: label, position or list of positions, allows plotting of one column versus another + - kind: str, the kind of plot to produce + - ax: matplotlib axes object, axes of the current figure + - subplots: bool or sequence, whether to group columns into subplots + - sharex: bool, share x axis in case subplots=True + - sharey: bool, share y axis in case subplots=True + - layout: tuple (rows, cols), layout of subplots + - figsize: tuple (width, height), figure size in inches + - use_index: bool, use index as ticks for x axis + - title: str or list, title to use for the plot + - grid: bool, axis grid lines + - legend: bool or {'reverse'}, place legend on axis subplots + - style: list or dict, matplotlib line style per column + - logx: bool, use log scaling on x axis + - logy: bool, use log scaling on y axis + - loglog: bool, use log scaling on both x and y axes + - xticks: sequence, values to use for the xticks + - yticks: sequence, values to use for the yticks + - xlim: 2-tuple/list, set the x limits of the current axes + - ylim: 2-tuple/list, set the y limits of the current axes + - rot: int, rotation for ticks + - fontsize: int, font size for xticks and yticks + - colormap: str or matplotlib colormap, colormap to select colors from + - colorbar: bool, if True, plot colorbar (only relevant for scatter and hexbin plots) + - position: float, specify relative alignments for bar plot layout + - table: bool, Series or DataFrame, or True for drawing a table + - yerr: DataFrame, Series, array-like, dict, or str, equivalent to xerr + - xerr: DataFrame, Series, array-like, dict, or str, equivalent to yerr + - stacked: bool, in line and bar plots, if True, create stacked plot + - sort_columns: bool, sort column names to determine plot ordering + - secondary_y: bool or sequence, whether to plot on secondary y-axis + - mark_right: bool, when using secondary_y axis, mark the column labels + - include_bool: bool, if True, boolean values can be plotted + - backend: str, backend to use instead of the backend specified in the option + """ + + def __call__(self, x=None, y=None, kind='line', ax=None, subplots=False, sharex=None, sharey=False, layout=None, figsize=None, use_index=True, title=None, grid=None, legend=True, style=None, logx=False, logy=False, loglog=False, xticks=None, yticks=None, xlim=None, ylim=None, rot=None, fontsize=None, colormap=None, colorbar=None, position=0.5, table=False, yerr=None, xerr=None, stacked=False, sort_columns=False, secondary_y=False, mark_right=True, include_bool=False, backend=None, **kwargs): + """Create a plot with various visualization types.""" + + def line(self, x=None, y=None, **kwargs): + """Plot Series or DataFrame as lines.""" + + def bar(self, x=None, y=None, **kwargs): + """Make a vertical bar plot.""" + + def barh(self, x=None, y=None, **kwargs): + """Make a horizontal bar plot.""" + + def box(self, by=None, **kwargs): + """Make a box plot of the DataFrame columns.""" + + def hist(self, by=None, bins=10, **kwargs): + """Draw one histogram of the DataFrame's columns.""" + + def kde(self, bw_method=None, ind=None, **kwargs): + """Generate Kernel Density Estimate plot using Gaussian kernels.""" + + def density(self, bw_method=None, ind=None, **kwargs): + """Generate Kernel Density Estimate plot using Gaussian kernels (alias for kde).""" + + def area(self, x=None, y=None, stacked=True, **kwargs): + """Draw a stacked area plot.""" + + def pie(self, y=None, **kwargs): + """Generate a pie plot.""" + + def scatter(self, x, y, s=None, c=None, **kwargs): + """Create a scatter plot with varying marker point size and color.""" + + def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs): + """Generate a hexagonal binning plot.""" +``` + +### Statistical Distribution Plots + +Functions for creating histograms and box plots from DataFrame and Series data. + +```python { .api } +def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, figsize=None, bins=10, backend=None, legend=False, **kwargs): + """ + Draw histogram of the input series using matplotlib. + + Parameters: + - by: object, if passed, used to form histograms for separate groups + - ax: matplotlib axis object, if not passed, uses gca() + - grid: bool, whether to show axis grid lines + - xlabelsize: int, if specified changes the x-axis label size + - xrot: float, rotation of x axis labels + - ylabelsize: int, if specified changes the y-axis label size + - yrot: float, rotation of y axis labels + - figsize: tuple, figure size in inches by default + - bins: int or sequence, number of histogram bins to be used + - backend: str, backend to use instead of the backend specified in the option + - legend: bool, whether to show the legend + + Returns: + matplotlib.axes.Axes or numpy.ndarray of them + """ + +def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, sharey=False, figsize=None, layout=None, bins=10, backend=None, legend=False, **kwargs): + """ + Make a histogram of the DataFrame's columns. + + Parameters: + - data: DataFrame, the pandas object holding the data + - column: str or sequence, if passed, will be used to limit data to a subset of columns + - by: object, if passed, then used to form histograms for separate groups + - grid: bool, whether to show axis grid lines + - xlabelsize: int, if specified changes the x-axis label size + - xrot: float, rotation of x axis labels + - ylabelsize: int, if specified changes the y-axis label size + - yrot: float, rotation of y axis labels + - ax: matplotlib axes object, if not passed, uses gca() + - sharex: bool, in case subplots=True, share x axis and set some x axis labels to invisible + - sharey: bool, in case subplots=True, share y axis and set some y axis labels to invisible + - figsize: tuple, figure size in inches by default + - layout: tuple, (rows, columns) for the layout of the histograms + - bins: int or sequence, number of histogram bins to be used + - backend: str, backend to use for plotting + - legend: bool, whether to show the legend + + Returns: + matplotlib.axes.Axes or numpy.ndarray of them + """ + +def boxplot(data, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, **kwargs): + """ + Make a box plot from DataFrame columns. + + Parameters: + - data: DataFrame, the pandas object holding the data + - column: str or list of str, column name or list of names, or vector + - by: str or list of str, column in the DataFrame to group by + - ax: matplotlib axes object, axes object to draw the plot onto + - fontsize: float or str, tick label font size in points or as a string + - rot: int, rotation angle of labels (in degrees) + - grid: bool, setting this to True will show the grid + - figsize: tuple, a tuple (width, height) in inches + - layout: tuple, (rows, columns) for the layout of the plot + - return_type: str, the kind of object to return + + Returns: + result : varies based on return_type parameter + """ + +def boxplot_frame(self, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, backend=None, **kwargs): + """ + Make a box plot of the DataFrame columns. + + Parameters: + - column: str or list of str, column name or sequence + - by: str or array-like, column in the DataFrame to group by + - ax: matplotlib axes object, axes object to draw the plot onto + - fontsize: float or str, tick label font size + - rot: int, rotation angle of labels + - grid: bool, setting this to True will show the grid + - figsize: tuple, figure size in inches + - layout: tuple, (rows, columns) for the layout of the plot + - return_type: str, the kind of object to return + - backend: str, backend to use for plotting + + Returns: + result : varies based on return_type parameter + """ + +def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, rot=0, grid=True, ax=None, figsize=None, layout=None, sharex=False, sharey=True, backend=None, **kwargs): + """ + Make box plots from DataFrameGroupBy data. + + Parameters: + - grouped: Grouped DataFrame + - subplots: bool, False - no subplots will be used, True - create a subplot for each group + - column: column name or list of names, or vector + - fontsize: float or str, tick label font size + - rot: int, label rotation angle + - grid: bool, setting this to True will show the grid + - ax: matplotlib axis object + - figsize: tuple, figure size in inches + - layout: tuple, (rows, columns) for the layout of subplots + - sharex: bool, whether to share the x axis between subplots + - sharey: bool, whether to share the y axis between subplots + - backend: str, backend to use for plotting + + Returns: + matplotlib.axes.Axes or numpy.ndarray of them + """ +``` + +### Multivariate Analysis Plots + +Advanced plotting functions for exploring relationships between multiple variables. + +```python { .api } +def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, diagonal='hist', marker='.', density_kwds=None, hist_kwds=None, range_padding=0.05, **kwargs): + """ + Draw a matrix of scatter plots. + + Parameters: + - frame: DataFrame, data for scatter matrix + - alpha: float, amount of transparency applied + - figsize: tuple (width, height), figure size in inches + - ax: matplotlib axis object + - grid: bool, setting this to True will show the grid + - diagonal: str {'hist', 'kde'}, pick between 'kde' and 'hist' for diagonal plots + - marker: str, matplotlib marker type + - density_kwds: dict, keyword arguments to be passed to kernel density estimate plot + - hist_kwds: dict, keyword arguments to be passed to hist function + - range_padding: float, relative extension of axis range + + Returns: + numpy.ndarray: A matrix of scatter plots + """ + +def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): + """ + Plot a multidimensional dataset in 2D. + + Each Series in the DataFrame is represented as a evenly distributed + slice on a circle. RadViz allows projection of N-dimensional data set into 2D space. + + Parameters: + - frame: DataFrame, object holding the data + - class_column: str, column name containing the name of the data point category + - ax: matplotlib.axes.Axes, a plot instance to which to add the information + - color: list or tuple of str, assign a color to each category + - colormap: str or matplotlib.colors.Colormap, colormap to select colors from + + Returns: + matplotlib.axes.Axes + """ + +def andrews_curves(frame, class_column, ax=None, samples=200, color=None, colormap=None, **kwargs): + """ + Generate a matplotlib plot for visualizing clusters of multivariate data. + + Andrews curves have the functional form: + f(t) = x_1/sqrt(2) + x_2*sin(t) + x_3*cos(t) + x_4*sin(2t) + x_5*cos(2t) + ... + + Parameters: + - frame: DataFrame, data to be plotted, preferably normalized to (0.0, 1.0) + - class_column: str, name of the column containing class names + - ax: matplotlib axes object, axes to use + - samples: int, number of points to plot in each curve + - color: list or tuple of str, colors to use for the different classes + - colormap: str or matplotlib colormap object, colormap to select colors from + + Returns: + matplotlib.axes.Axes + """ + +def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, use_columns=False, xticks=None, colormap=None, axvlines=True, axvlines_kwds=None, sort_labels=False, **kwargs): + """ + Parallel coordinates plotting. + + Parameters: + - frame: DataFrame, data for parallel coordinates plot + - class_column: str, column name containing class names + - cols: list, column names to use + - ax: matplotlib axis object + - color: list or tuple, colors to use for the different classes + - use_columns: bool, if true, columns will be used as xticks + - xticks: list or tuple, values to use for xticks + - colormap: str or matplotlib colormap, colormap to use for line colors + - axvlines: bool, if true, vertical lines will be added at each xtick + - axvlines_kwds: dict, options to be passed to axvline method for vertical lines + - sort_labels: bool, sort class_column labels + + Returns: + matplotlib.axes.Axes + """ +``` + +### Time Series Visualization + +Specialized plots for analyzing temporal patterns and relationships in time series data. + +```python { .api } +def lag_plot(series, lag=1, ax=None, **kwds): + """ + Lag plot for time series. + + Parameters: + - series: Series, the time series to visualize + - lag: int, lag length of the scatter plot + - ax: matplotlib axis object, the matplotlib axis object to use + + Returns: + matplotlib.axes.Axes + """ + +def autocorrelation_plot(series, ax=None, **kwargs): + """ + Autocorrelation plot for time series. + + The horizontal lines in the plot correspond to 95% and 99% confidence bands. + The dashed line is 99% confidence band. + + Parameters: + - series: Series, the time series to visualize + - ax: matplotlib axis object, the matplotlib axis object to use + + Returns: + matplotlib.axes.Axes + """ +``` + +### Statistical Bootstrap Analysis + +Bootstrap resampling visualization for uncertainty estimation. + +```python { .api } +def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): + """ + Bootstrap plot on mean, median and mid-range statistics. + + The bootstrap plot is used to estimate the uncertainty of a statistic + by relying on random sampling with replacement. + + Parameters: + - series: Series, series from where to get the samplings for the bootstrapping + - fig: matplotlib.figure.Figure, if given, it will use the fig reference for plotting + - size: int, number of data points to consider during each sampling + - samples: int, number of times the bootstrap procedure is performed + + Returns: + matplotlib.figure.Figure + """ +``` + +### Table Display and Matplotlib Integration + +Display utilities and matplotlib converter management. + +```python { .api } +def table(ax, data, **kwargs): + """ + Helper function to convert DataFrame and Series to matplotlib.table. + + Parameters: + - ax: matplotlib axes object, axes to draw table on + - data: DataFrame or Series, data for table contents + - **kwargs: keyword arguments passed to matplotlib.table.table + + Returns: + matplotlib.table.Table: matplotlib table object + """ + +def register_matplotlib_converters(): + """ + Register pandas formatters and converters with matplotlib. + + This function modifies the global matplotlib.units.registry dictionary. + Pandas adds custom converters for pd.Timestamp, pd.Period, np.datetime64, + datetime.datetime, datetime.date, and datetime.time. + """ + +def deregister_matplotlib_converters(): + """ + Remove pandas formatters and converters. + + Removes the custom converters added by register_matplotlib_converters. + This attempts to set the state of the registry back to the state before + pandas registered its own units. + """ +``` + +### Plot Configuration and Styling + +Configuration options and styling utilities for customizing plot appearance. + +```python { .api } +class _Options(dict): + """ + Stores pandas plotting options. + + Allows for parameter aliasing so you can use parameter names that are + the same as the plot function parameters, stored in canonical format. + """ + + def use(self, key, value): + """ + Temporarily set a parameter value using the with statement. + + Parameters: + - key: str, parameter name (aliasing allowed) + - value: any, parameter value to set temporarily + + Returns: + context manager for temporary parameter setting + """ + + def reset(self): + """Reset the option store to its initial state.""" + +# Global plot parameters object +plot_params = _Options() +``` + +## Types + +```python { .api } +# Plot kind constants +class PlotKind: + LINE = 'line' + BAR = 'bar' + BARH = 'barh' + HIST = 'hist' + BOX = 'box' + KDE = 'kde' + DENSITY = 'density' + AREA = 'area' + PIE = 'pie' + SCATTER = 'scatter' + HEXBIN = 'hexbin' + +# Diagonal plot options for scatter_matrix +class DiagonalKind: + HIST = 'hist' + KDE = 'kde' + +# Plot accessor class +class PlotAccessor: + """Plotting accessor for Series and DataFrame objects.""" + def __init__(self, data): ... + def __call__(self, *args, **kwargs): ... + + # Individual plot methods + line: Callable + bar: Callable + barh: Callable + box: Callable + hist: Callable + kde: Callable + density: Callable + area: Callable + pie: Callable + scatter: Callable # DataFrame only + hexbin: Callable # DataFrame only + +# Matplotlib integration types +from matplotlib.axes import Axes +from matplotlib.figure import Figure +from matplotlib.table import Table +from matplotlib.colors import Colormap +import numpy as np + +# Return types for plotting functions +PlotResult = Axes | np.ndarray | Figure | Table +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pandas/docs/statistics-math.md b/.tessl/tiles/tessl/pypi-pandas/docs/statistics-math.md new file mode 100644 index 0000000..418e2d4 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pandas/docs/statistics-math.md @@ -0,0 +1,608 @@ +# Statistical and Mathematical Operations + +Built-in statistical functions, mathematical operations, and data analysis utilities including descriptive statistics, correlation analysis, and numerical computations. + +## Core Imports + +```python +import pandas as pd +from pandas import cut, qcut, factorize, value_counts +``` + +## Capabilities + +### Descriptive Statistics + +Core statistical functions available on DataFrame and Series objects. + +```python { .api } +# These are methods available on DataFrame and Series: + +# Central tendency +def mean(axis=None, skipna=True, level=None, numeric_only=None): + """Return the mean of the values over the requested axis.""" + +def median(axis=None, skipna=True, level=None, numeric_only=None): + """Return the median of the values over the requested axis.""" + +def mode(axis=0, numeric_only=False, dropna=True): + """Return the mode(s) of each element along the selected axis.""" + +# Measures of spread +def std(axis=None, skipna=True, level=None, ddof=1, numeric_only=None): + """Return sample standard deviation over requested axis.""" + +def var(axis=None, skipna=True, level=None, ddof=1, numeric_only=None): + """Return unbiased variance over requested axis.""" + +def sem(axis=None, skipna=True, level=None, ddof=1, numeric_only=None): + """Return unbiased standard error of the mean over requested axis.""" + +def mad(axis=None, skipna=True, level=None): + """Return the mean absolute deviation of the values over the requested axis.""" + +# Distribution shape +def skew(axis=None, skipna=True, level=None, numeric_only=None): + """Return unbiased skew over requested axis.""" + +def kurt(axis=None, skipna=True, level=None, numeric_only=None): + """Return unbiased kurtosis over requested axis.""" + +def kurtosis(axis=None, skipna=True, level=None, numeric_only=None): + """Return unbiased kurtosis over requested axis (alias for kurt).""" + +# Extremes +def min(axis=None, skipna=True, level=None, numeric_only=None): + """Return the minimum of the values over the requested axis.""" + +def max(axis=None, skipna=True, level=None, numeric_only=None): + """Return the maximum of the values over the requested axis.""" + +def idxmin(axis=0, skipna=True): + """Return index of first occurrence of minimum over requested axis.""" + +def idxmax(axis=0, skipna=True): + """Return index of first occurrence of maximum over requested axis.""" + +# Aggregation +def sum(axis=None, skipna=True, level=None, numeric_only=None, min_count=0): + """Return the sum of the values over the requested axis.""" + +def prod(axis=None, skipna=True, level=None, numeric_only=None, min_count=0): + """Return the product of the values over the requested axis.""" + +def product(axis=None, skipna=True, level=None, numeric_only=None, min_count=0): + """Return the product of the values over the requested axis (alias for prod).""" + +def count(axis=0, level=None, numeric_only=False): + """Count non-NA cells for each column or row.""" + +def nunique(axis=0, dropna=True): + """Count number of distinct elements in specified axis.""" + +# Quantiles and percentiles +def quantile(q=0.5, axis=0, numeric_only=True, interpolation='linear', method='single'): + """Return values at the given quantile over requested axis.""" + +def describe(percentiles=None, include=None, exclude=None): + """Generate descriptive statistics.""" + +# Cumulative operations +def cumsum(axis=None, skipna=True): + """Return cumulative sum over a DataFrame or Series axis.""" + +def cumprod(axis=None, skipna=True): + """Return cumulative product over a DataFrame or Series axis.""" + +def cummax(axis=None, skipna=True): + """Return cumulative maximum over a DataFrame or Series axis.""" + +def cummin(axis=None, skipna=True): + """Return cumulative minimum over a DataFrame or Series axis.""" +``` + +### Correlation and Covariance + +Functions to compute relationships between variables. + +```python { .api } +# These are methods available on DataFrame and Series: + +def corr(method='pearson', min_periods=1, numeric_only=True): + """ + Compute pairwise correlation of columns. + + Parameters: + - method: str, correlation method ('pearson', 'kendall', 'spearman') + - min_periods: int, minimum number of observations for valid result + - numeric_only: bool, include only numeric columns + + Returns: + DataFrame, correlation matrix + """ + +def cov(min_periods=None, ddof=1, numeric_only=True): + """ + Compute pairwise covariance of columns. + + Parameters: + - min_periods: int, minimum number of observations for valid result + - ddof: int, delta degrees of freedom + - numeric_only: bool, include only numeric columns + + Returns: + DataFrame, covariance matrix + """ + +def corrwith(other, axis=0, drop=False, method='pearson', numeric_only=True): + """ + Compute pairwise correlation. + + Parameters: + - other: DataFrame, Series, or array-like + - axis: int, axis to use (0 or 1) + - drop: bool, drop missing indices from result + - method: str, correlation method ('pearson', 'kendall', 'spearman') + - numeric_only: bool, include only numeric columns + + Returns: + Series, correlations + """ +``` + +### Mathematical Operations + +Element-wise mathematical functions and operations. + +```python { .api } +# These are methods available on DataFrame and Series: + +def abs(): + """Return a Series/DataFrame with absolute numeric value of each element.""" + +def round(decimals=0): + """Round each value to the given number of decimals.""" + +def clip(lower=None, upper=None, axis=None, inplace=False): + """Trim values at input threshold(s).""" + +def rank(axis=0, method='average', numeric_only=None, na_option='keep', ascending=True, pct=False): + """ + Compute numerical data ranks along axis. + + Parameters: + - axis: int, axis to rank along + - method: str, how to rank ('average', 'min', 'max', 'first', 'dense') + - numeric_only: bool, include only numeric columns + - na_option: str, how to rank NaN values ('keep', 'top', 'bottom') + - ascending: bool, rank in ascending order + - pct: bool, return percentile rank + + Returns: + same type as caller, data ranks + """ + +# Exponential and logarithmic functions (available via NumPy integration) +def exp(): + """Calculate exponential of elements.""" + +def log(): + """Calculate natural logarithm of elements.""" + +def log10(): + """Calculate base-10 logarithm of elements.""" + +def log2(): + """Calculate base-2 logarithm of elements.""" + +def sqrt(): + """Calculate square root of elements.""" + +def pow(other): + """Calculate exponential power of elements.""" + +# Trigonometric functions (available via NumPy integration) +def sin(): + """Calculate sine of elements.""" + +def cos(): + """Calculate cosine of elements.""" + +def tan(): + """Calculate tangent of elements.""" + +def arcsin(): + """Calculate inverse sine of elements.""" + +def arccos(): + """Calculate inverse cosine of elements.""" + +def arctan(): + """Calculate inverse tangent of elements.""" +``` + +### Comparison Operations + +Functions for comparing and ranking data. + +```python { .api } +# These are methods available on DataFrame and Series: + +def eq(other, axis='columns', level=None): + """Get equal to of dataframe and other, element-wise (binary operator ==).""" + +def ne(other, axis='columns', level=None): + """Get not equal to of dataframe and other, element-wise (binary operator !=).""" + +def lt(other, axis='columns', level=None): + """Get less than of dataframe and other, element-wise (binary operator <).""" + +def le(other, axis='columns', level=None): + """Get less than or equal to of dataframe and other, element-wise (binary operator <=).""" + +def gt(other, axis='columns', level=None): + """Get greater than of dataframe and other, element-wise (binary operator >).""" + +def ge(other, axis='columns', level=None): + """Get greater than or equal to of dataframe and other, element-wise (binary operator >=).""" + +def between(left, right, inclusive='both'): + """ + Return boolean Series equivalent to left <= series <= right. + + Parameters: + - left: scalar or list-like, left boundary + - right: scalar or list-like, right boundary + - inclusive: str, include boundaries ('both', 'neither', 'left', 'right') + + Returns: + Series, boolean values + """ + +def isin(values): + """ + Whether each element in the Series/DataFrame is contained in values. + + Parameters: + - values: set or list-like, sequence of values to test + + Returns: + Series/DataFrame of bools, boolean values + """ +``` + +### Top-Level Statistical Functions + +Standalone statistical functions that operate on array-like data. + +```python { .api } +def cut(x, bins, right=True, labels=None, retbins=False, precision=3, include_lowest=False, duplicates='raise', ordered=True): + """ + Bin values into discrete intervals. + + Parameters: + - x: array-like, input array to be binned + - bins: int, sequence of scalars, or IntervalIndex + - right: bool, whether bins include rightmost edge + - labels: array or bool, labels for returned bins + - retbins: bool, return bins + - precision: int, precision for bin labels + - include_lowest: bool, whether first interval is left-inclusive + - duplicates: str, behavior for non-unique bin edges ('raise' or 'drop') + - ordered: bool, whether returned Categorical is ordered + + Returns: + Categorical, Series, or array + """ + +def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'): + """ + Quantile-based discretization function. + + Parameters: + - x: array-like, input array to be binned + - q: int or list-like of float, quantiles to compute + - labels: array or bool, labels for returned bins + - retbins: bool, return (bins, labels) + - precision: int, precision for bin labels + - duplicates: str, behavior for non-unique bin edges ('raise' or 'drop') + + Returns: + Categorical, Series, or array + """ + +def factorize(values, sort=False, na_sentinel=-1, use_na_sentinel=True, size_hint=None): + """ + Encode the object as an enumerated type or categorical variable. + + Parameters: + - values: sequence, 1-d array-like + - sort: bool, sort uniques + - na_sentinel: int, value for missing values + - use_na_sentinel: bool, use na_sentinel for missing values + - size_hint: int, hint for hashtable size + + Returns: + tuple of (codes, uniques) + """ + +def unique(values): + """ + Return unique values based on a hash table. + + Parameters: + - values: 1d array-like + + Returns: + ndarray or ExtensionArray + """ + +def value_counts(values, sort=True, ascending=False, normalize=False, bins=None, dropna=True): + """ + Compute a histogram of the 1D array values. + + Parameters: + - values: 1d array-like + - sort: bool, sort by values + - ascending: bool, sort in ascending order + - normalize: bool, return relative frequencies + - bins: int, group into half-open bins + - dropna: bool, exclude NaN values + + Returns: + Series + """ +``` + +### Numeric Conversion + +Functions for converting data to numeric types. + +```python { .api } +def to_numeric(arg, errors='raise', downcast=None): + """ + Convert argument to a numeric type. + + Parameters: + - arg: scalar, list, tuple, 1-d array, or Series + - errors: str, error handling ('raise', 'coerce', 'ignore') + - downcast: str, downcast resulting data ('integer', 'signed', 'unsigned', 'float') + + Returns: + numeric, converted values + """ +``` + +### Groupby Statistical Operations + +Statistical methods available on GroupBy objects. + +```python { .api } +# Available on DataFrameGroupBy and SeriesGroupBy objects: + +class GroupBy: + """GroupBy object with statistical methods.""" + + def mean(self, numeric_only=True, engine=None, engine_kwargs=None): + """Compute mean of groups.""" + + def median(self, numeric_only=True): + """Compute median of groups.""" + + def sum(self, numeric_only=True, min_count=0, engine=None, engine_kwargs=None): + """Compute sum of groups.""" + + def min(self, numeric_only=False, min_count=-1, engine=None, engine_kwargs=None): + """Compute min of groups.""" + + def max(self, numeric_only=False, min_count=-1, engine=None, engine_kwargs=None): + """Compute max of groups.""" + + def std(self, ddof=1, engine=None, engine_kwargs=None, numeric_only=True): + """Compute standard deviation of groups.""" + + def var(self, ddof=1, engine=None, engine_kwargs=None, numeric_only=True): + """Compute variance of groups.""" + + def count(self): + """Compute count of group.""" + + def size(self): + """Compute group sizes.""" + + def nunique(self, dropna=True): + """Count number of unique values in each group.""" + + def quantile(self, q=0.5, interpolation='linear', numeric_only=True): + """Return values at given quantile for each group.""" + + def describe(self, percentiles=None, include=None, exclude=None): + """Generate descriptive statistics for each group.""" + + def sem(self, ddof=1, numeric_only=True): + """Compute standard error of the mean for each group.""" + + def rank(self, method='average', ascending=True, na_option='keep', pct=False, axis=0): + """Provide the rank of values within each group.""" + + def cumcount(self, ascending=True): + """Number each item in each group from 0 to the length of that group - 1.""" + + def cumsum(self, axis=0, **kwargs): + """Cumulative sum for each group.""" + + def cumprod(self, axis=0, **kwargs): + """Cumulative product for each group.""" + + def cummax(self, axis=0, numeric_only=False, **kwargs): + """Cumulative max for each group.""" + + def cummin(self, axis=0, numeric_only=False, **kwargs): + """Cumulative min for each group.""" + + def skew(self, axis=0, skipna=True, numeric_only=True, **kwargs): + """Return unbiased skew within groups.""" + + def kurt(self, axis=0, skipna=True, numeric_only=True, **kwargs): + """Return unbiased kurtosis within groups.""" + + def mad(self, **kwargs): + """Return mean absolute deviation within groups.""" + + def prod(self, numeric_only=True, min_count=0): + """Compute product of group values.""" + + def ohlc(self): + """Compute open, high, low and close values of a group.""" + + def first(self, numeric_only=False, min_count=-1): + """Return first value within each group.""" + + def last(self, numeric_only=False, min_count=-1): + """Return last value within each group.""" + + def nth(self, n, dropna=None): + """Take nth value, or subset if n is a list.""" + + def idxmax(self, axis=0, skipna=True): + """Return index of maximum value within each group.""" + + def idxmin(self, axis=0, skipna=True): + """Return index of minimum value within each group.""" +``` + +### Advanced Statistical Functions + +More specialized statistical operations and utilities. + +```python { .api } +# These functions work with DataFrame/Series or can be called independently: + +def pct_change(periods=1, fill_method='pad', limit=None, freq=None): + """ + Percentage change between current and prior element. + + Parameters: + - periods: int, periods to shift for forming percent change + - fill_method: str, how to handle NaNs before computing percent changes + - limit: int, number of consecutive NaNs to fill before stopping + - freq: DateOffset, Timedelta or str, increment to use for time rule + + Returns: + Series/DataFrame, percentage changes + """ + +def diff(periods=1, axis=0): + """ + First discrete difference of element. + + Parameters: + - periods: int, periods to shift for calculating difference + - axis: int, axis to shift along + + Returns: + Series/DataFrame, differences + """ + +def shift(periods=1, freq=None, axis=0, fill_value=None): + """ + Shift index by desired number of periods. + + Parameters: + - periods: int, number of periods to shift + - freq: DateOffset, Timedelta, or str, offset to use from time series API + - axis: int, axis to shift + - fill_value: object, scalar value to use for missing values + + Returns: + Series/DataFrame, shifted data + """ + +def expanding(min_periods=1, center=None, axis=0, method='single'): + """ + Provide expanding window calculations. + + Parameters: + - min_periods: int, minimum number of observations in window + - center: bool, whether result should be centered + - axis: int, axis along which to slide window + - method: str, execution method ('single' thread or 'table') + + Returns: + Expanding object + """ + +def rolling(window, min_periods=None, center=False, win_type=None, on=None, axis=0, closed=None, method='single'): + """ + Provide rolling window calculations. + + Parameters: + - window: int, size of moving window + - min_periods: int, minimum number of observations in window + - center: bool, whether result should be centered + - win_type: str, window type + - on: str, datetime-like column for DatetimeIndex + - axis: int, axis along which to slide window + - closed: str, make interval closed on 'right', 'left', 'both' or 'neither' + - method: str, execution method ('single' or 'table') + + Returns: + Rolling object + """ + +def ewm(com=None, span=None, halflife=None, alpha=None, min_periods=0, adjust=True, ignore_na=False, axis=0, times=None, method='single'): + """ + Provide exponentially weighted (EW) calculations. + + Parameters: + - com: float, center of mass + - span: float, span + - halflife: float, decay in terms of half-life + - alpha: float, smoothing factor + - min_periods: int, minimum number of observations + - adjust: bool, divide by decaying adjustment factor + - ignore_na: bool, ignore missing values + - axis: int, axis along which to calculate + - times: array-like, times corresponding to observations + - method: str, execution method ('single' or 'table') + + Returns: + ExponentialMovingWindow object + """ +``` + +## Types + +```python { .api } +# Statistical method options +StatMethod = Literal['average', 'min', 'max', 'first', 'dense'] +CorrelationMethod = Literal['pearson', 'kendall', 'spearman'] +InterpolationMethod = Literal['linear', 'lower', 'higher', 'midpoint', 'nearest'] +QuantileInterpolation = Literal['linear', 'lower', 'higher', 'midpoint', 'nearest'] + +# Ranking options +RankMethod = Literal['average', 'min', 'max', 'first', 'dense'] +RankNaOption = Literal['keep', 'top', 'bottom'] + +# Numeric conversion options +NumericErrors = Literal['raise', 'coerce', 'ignore'] +DowncastOptions = Literal['integer', 'signed', 'unsigned', 'float'] + +# Binning options +BinningDuplicates = Literal['raise', 'drop'] +IntervalInclusive = Literal['both', 'neither', 'left', 'right'] + +# Window calculation options +WindowMethod = Literal['single', 'table'] +WindowType = Literal[ + 'boxcar', 'triang', 'blackman', 'hamming', 'bartlett', 'parzen', + 'bohman', 'blackmanharris', 'nuttall', 'barthann', 'kaiser', + 'gaussian', 'general_gaussian', 'slepian', 'exponential' +] + +# Percentile inclusion options +PercentileInclusive = Literal['both', 'neither', 'left', 'right'] + +# Axis specification +AxisOption = Union[int, str, None] +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pandas/docs/time-series.md b/.tessl/tiles/tessl/pypi-pandas/docs/time-series.md new file mode 100644 index 0000000..a790f4f --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pandas/docs/time-series.md @@ -0,0 +1,539 @@ +# Time Series and Date Handling + +Comprehensive time series functionality including date/time parsing, time zone handling, frequency conversion, resampling, and specialized time-based operations. + +## Core Imports + +```python +import pandas as pd +from pandas import date_range, to_datetime, Timestamp, Timedelta +``` + +## Capabilities + +### Date and Time Creation + +Functions to create and manipulate date/time objects and ranges. + +```python { .api } +def date_range(start=None, end=None, periods=None, freq=None, tz=None, normalize=False, name=None, inclusive='both', **kwargs): + """ + Return a fixed frequency DatetimeIndex. + + Parameters: + - start: str or datetime-like, left bound for generating dates + - end: str or datetime-like, right bound for generating dates + - periods: int, number of periods to generate + - freq: str or DateOffset, frequency string ('D', 'B', 'H', 'T', 'S', 'MS', etc.) + - tz: str or tzinfo, time zone name for localized DatetimeIndex + - normalize: bool, normalize start/end dates to midnight + - name: str, name of the resulting DatetimeIndex + - inclusive: str, whether to include both endpoints ('both', 'neither', 'left', 'right') + + Returns: + DatetimeIndex + """ + +def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, normalize=True, name=None, weekmask=None, holidays=None, inclusive='both', **kwargs): + """ + Return a fixed frequency DatetimeIndex with business day default. + + Parameters: + - start: str or datetime-like, left bound for generating dates + - end: str or datetime-like, right bound for generating dates + - periods: int, number of periods to generate + - freq: str or DateOffset, frequency string (default 'B' for business day) + - weekmask: str or None, weekmask of valid business days + - holidays: list-like or None, dates to exclude from valid business days + + Returns: + DatetimeIndex + """ + +def period_range(start=None, end=None, periods=None, freq=None, name=None): + """ + Return a fixed frequency PeriodIndex. + + Parameters: + - start: str or Period, left bound for generating periods + - end: str or Period, right bound for generating periods + - periods: int, number of periods to generate + - freq: str or DateOffset, frequency string + - name: str, name of the resulting PeriodIndex + + Returns: + PeriodIndex + """ + +def timedelta_range(start=None, end=None, periods=None, freq=None, name=None, closed=None): + """ + Return a fixed frequency TimedeltaIndex. + + Parameters: + - start: str or timedelta, left bound for generating timedeltas + - end: str or timedelta, right bound for generating timedeltas + - periods: int, number of periods to generate + - freq: str or DateOffset, frequency string + - name: str, name of the resulting TimedeltaIndex + - closed: str, make interval closed on 'left', 'right' or 'both' sides + + Returns: + TimedeltaIndex + """ + +def interval_range(start=0, end=None, periods=None, freq=None, name=None, closed='right'): + """ + Return a fixed frequency IntervalIndex. + + Parameters: + - start: numeric or datetime-like, left bound for generating intervals + - end: numeric or datetime-like, right bound for generating intervals + - periods: int, number of periods to generate + - freq: numeric, datetime-like, or offset string, length of each interval + - name: str, name of the resulting IntervalIndex + - closed: str, whether intervals are closed on left, right, both, or neither + + Returns: + IntervalIndex + """ +``` + +### Date and Time Conversion + +Functions to parse and convert various date/time formats. + +```python { .api } +def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, utc=None, format=None, exact=True, unit=None, infer_datetime_format=False, origin='unix', cache=True): + """ + Convert argument to datetime. + + Parameters: + - arg: int, float, str, datetime, list, tuple, 1-d array, Series, DataFrame/dict-like + - errors: str, error handling behavior ('raise', 'coerce', 'ignore') + - dayfirst: bool, interpret first value as day (DD/MM vs MM/DD) + - yearfirst: bool, interpret first value as year + - utc: bool, return UTC DatetimeIndex if True + - format: str, strftime format to parse time + - exact: bool, control how format is used + - unit: str, unit of numeric arg ('D', 's', 'ms', 'us', 'ns') + - infer_datetime_format: bool, attempt to infer format automatically + - origin: scalar, define reference date ('unix', '1900-01-01') + - cache: bool, use cache of unique, converted dates + + Returns: + datetime, Timestamp, DatetimeIndex + """ + +def to_timedelta(arg, unit=None, errors='raise'): + """ + Convert argument to timedelta. + + Parameters: + - arg: str, timedelta, list-like, or Series + - unit: str, unit of arg when arg is numeric ('D', 'h', 'm', 's', 'ms', 'us', 'ns') + - errors: str, error handling behavior ('raise', 'coerce', 'ignore') + + Returns: + timedelta, TimedeltaIndex, Series + """ + +def infer_freq(index, warn=True): + """ + Infer most likely frequency given input index. + + Parameters: + - index: DatetimeIndex or TimedeltaIndex + - warn: bool, warn if frequency cannot be inferred + + Returns: + str or None, inferred frequency + """ +``` + +### Core Time Objects + +Core pandas time-based scalar types for representing dates, times, and intervals. + +```python { .api } +class Timestamp: + def __init__(self, ts_input=None, freq=None, tz=None, unit=None, year=None, month=None, day=None, hour=None, minute=None, second=None, microsecond=None, nanosecond=None, tzinfo=None, fold=None): + """ + Pandas replacement for datetime.datetime. + + Parameters: + - ts_input: datetime-like, str, int, float + - freq: str or DateOffset, offset which Timestamp will have + - tz: str, pytz.timezone, dateutil.tz.tzfile or None + - unit: str, unit of ts_input if ts_input is int or float + - year, month, day, hour, minute, second, microsecond, nanosecond: int + """ + + def normalize(self): + """Return timestamp truncated to midnight.""" + + def tz_localize(self, tz, ambiguous='raise', nonexistent='raise'): + """Localize timestamp to given timezone.""" + + def tz_convert(self, tz): + """Convert timestamp to given timezone.""" + + def strftime(self, format): + """Format timestamp using strftime.""" + + def isoformat(self, sep='T', timespec='auto'): + """Return ISO 8601 formatted string.""" + + def timestamp(self): + """Return POSIX timestamp.""" + +class Timedelta: + def __init__(self, value=None, unit=None, **kwargs): + """ + Represents a duration between two dates or times. + + Parameters: + - value: Timedelta, timedelta, np.timedelta64, str, or int + - unit: str, unit for value if value is numeric + """ + + def total_seconds(self): + """Total seconds in the timedelta.""" + + def to_pytimedelta(self): + """Convert to python datetime.timedelta.""" + + def to_timedelta64(self): + """Convert to numpy.timedelta64.""" + +class Period: + def __init__(self, value=None, freq=None, ordinal=None, year=None, month=None, day=None, hour=None, minute=None, second=None): + """ + Represents a period of time. + + Parameters: + - value: Period, str, datetime, int + - freq: str or DateOffset + - ordinal: int, period ordinal value + """ + + def asfreq(self, freq, how='E'): + """Convert Period to desired frequency.""" + + def to_timestamp(self, freq=None, how='start'): + """Return Timestamp representation of Period.""" + + def strftime(self, format): + """Format Period using strftime.""" + +class Interval: + def __init__(self, left, right, closed='right'): + """ + An interval of values. + + Parameters: + - left: orderable scalar, left bound of interval + - right: orderable scalar, right bound of interval + - closed: str, whether interval is closed ('left', 'right', 'both', 'neither') + """ + + def overlaps(self, other): + """Check whether two intervals overlap.""" + + def contains(self, other): + """Check whether interval contains other.""" + + @property + def length(self): + """Return length of interval.""" + + @property + def mid(self): + """Return midpoint of interval.""" + +class DateOffset: + def __init__(self, n=1, normalize=False): + """ + Standard kind of date increment used for a date range. + + Parameters: + - n: int, number of time periods + - normalize: bool, normalize start/end dates to midnight + """ + + def apply(self, other): + """Apply offset to datetime.""" + + def rollforward(self, dt): + """Roll date forward to next offset.""" + + def rollback(self, dt): + """Roll date backward to previous offset.""" +``` + +### Time Zone Handling + +Functions and methods for working with time zones. + +```python { .api } +# These are methods of DatetimeIndex and Timestamp: +# .tz_localize(tz, ambiguous='raise', nonexistent='raise') - attach timezone to naive datetime +# .tz_convert(tz) - convert timezone-aware datetime to another timezone + +# Common timezone operations +def show_timezones(): + """Show list of available time zones.""" + +# Time zone constants and utilities (accessed via pandas) +import pandas as pd +# pd.Timestamp.now() - current timestamp +# pd.Timestamp.utcnow() - current UTC timestamp +# pd.Timestamp.today() - current local timestamp + +# Timezone-aware operations +# pd.date_range(..., tz='UTC') - create timezone-aware DatetimeIndex +# pd.to_datetime(..., utc=True) - parse as UTC +``` + +### Resampling and Frequency Conversion + +Methods for changing the frequency of time series data. + +```python { .api } +# These are methods of DataFrame/Series with DatetimeIndex: +# .resample(rule, axis=0, closed=None, label=None, convention='start', kind=None, loffset=None, base=None, on=None, level=None, origin='start_day', offset=None, group_keys=False) +# Returns Resampler object with aggregation methods: + +class Resampler: + """GroupBy-like object for resampling operations.""" + + def mean(self, numeric_only=False): + """Compute mean of groups.""" + + def sum(self, numeric_only=False, min_count=0): + """Compute sum of groups.""" + + def min(self, numeric_only=False): + """Compute min of groups.""" + + def max(self, numeric_only=False): + """Compute max of groups.""" + + def count(self): + """Compute count of groups.""" + + def std(self, ddof=1, numeric_only=False): + """Compute standard deviation of groups.""" + + def var(self, ddof=1, numeric_only=False): + """Compute variance of groups.""" + + def first(self, numeric_only=False, min_count=0): + """Compute first value of groups.""" + + def last(self, numeric_only=False, min_count=0): + """Compute last value of groups.""" + + def median(self, numeric_only=False): + """Compute median of groups.""" + + def ohlc(self): + """Compute open, high, low, close values.""" + + def apply(self, func, *args, **kwargs): + """Apply function to each group.""" + + def aggregate(self, func=None, *args, **kwargs): + """Aggregate using one or more operations.""" + + def transform(self, arg, *args, **kwargs): + """Transform using one or more operations.""" + + def interpolate(self, method='linear', axis=0, limit=None, inplace=False, limit_direction=None, limit_area=None, downcast=None, **kwargs): + """Interpolate values according to different methods.""" + + def asfreq(self, fill_value=None): + """Convert to specified frequency.""" + + def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): + """Fill missing values in resampled data.""" + +# Frequency conversion without aggregation +# .asfreq(freq, method=None, how=None, normalize=False, fill_value=None) +``` + +### Date Offsets and Business Calendar + +Specialized offset classes for date arithmetic and business calendar operations. + +```python { .api } +# Import from pandas.tseries.offsets +from pandas.tseries.offsets import ( + Day, Hour, Minute, Second, Milli, Micro, Nano, + BusinessDay, BDay, + CustomBusinessDay, CDay, + BusinessHour, CustomBusinessHour, + MonthEnd, BMonthEnd, MonthBegin, BMonthBegin, + SemiMonthEnd, SemiMonthBegin, + QuarterEnd, BQuarterEnd, QuarterBegin, BQuarterBegin, + YearEnd, BYearEnd, YearBegin, BYearBegin, + Week, WeekOfMonth, LastWeekOfMonth, + FY5253, FY5253Quarter, + Easter +) + +# Business day offset with custom calendar +class CustomBusinessDay(DateOffset): + def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri', holidays=None, calendar=None, offset=timedelta(0)): + """ + Custom business day offset. + + Parameters: + - n: int, number of periods + - weekmask: str or None, weekmask of valid business days + - holidays: list-like, dates to exclude from valid business days + - calendar: AbstractHolidayCalendar, holiday calendar to use + - offset: timedelta, time offset to apply + """ + +# Holiday calendar support +class AbstractHolidayCalendar: + """Abstract base class for holiday calendars.""" + + def holidays(self, start=None, end=None, return_name=False): + """Return holidays between start and end dates.""" + +class USFederalHolidayCalendar(AbstractHolidayCalendar): + """US Federal Holiday Calendar.""" + pass + +# Business hour offset +class BusinessHour(DateOffset): + def __init__(self, n=1, normalize=False, start='09:00', end='17:00', offset=timedelta(0)): + """ + Business hour offset. + + Parameters: + - n: int, number of periods + - start: str, start time of business hours + - end: str, end time of business hours + - offset: timedelta, time offset to apply + """ +``` + +### Rolling Window Operations + +Statistical operations over rolling windows of time series data. + +```python { .api } +# These are methods of DataFrame/Series: +# .rolling(window, min_periods=None, center=False, win_type=None, on=None, axis=0, closed=None, step=None, method='single') +# Returns Rolling object with statistical methods: + +class Rolling: + """Provides rolling window calculations.""" + + def count(self): + """Count of non-null observations.""" + + def sum(self, numeric_only=False, engine=None, engine_kwargs=None): + """Sum of values.""" + + def mean(self, numeric_only=False, engine=None, engine_kwargs=None): + """Mean of values.""" + + def median(self, numeric_only=False, engine=None, engine_kwargs=None): + """Median of values.""" + + def var(self, ddof=1, numeric_only=False, engine=None, engine_kwargs=None): + """Variance of values.""" + + def std(self, ddof=1, numeric_only=False, engine=None, engine_kwargs=None): + """Standard deviation of values.""" + + def min(self, numeric_only=False, engine=None, engine_kwargs=None): + """Min of values.""" + + def max(self, numeric_only=False, engine=None, engine_kwargs=None): + """Max of values.""" + + def corr(self, other=None, pairwise=None, ddof=1, numeric_only=False): + """Correlation of values.""" + + def cov(self, other=None, pairwise=None, ddof=1, numeric_only=False): + """Covariance of values.""" + + def skew(self, numeric_only=False): + """Skewness of values.""" + + def kurt(self, numeric_only=False): + """Kurtosis of values.""" + + def apply(self, func, raw=False, engine=None, engine_kwargs=None, args=None, kwargs=None): + """Apply function to rolling window.""" + + def aggregate(self, func, *args, **kwargs): + """Aggregate using one or more operations.""" + + def quantile(self, quantile, interpolation='linear', numeric_only=False): + """Quantile of values.""" + +# Expanding window operations +# .expanding(min_periods=1, center=None, axis=0, method='single') +# Returns Expanding object with same methods as Rolling + +class Expanding: + """Provides expanding window calculations.""" + # Same methods as Rolling class + pass + +# Exponentially weighted operations +# .ewm(com=None, span=None, halflife=None, alpha=None, min_periods=0, adjust=True, ignore_na=False, axis=0, times=None, method='single') +# Returns ExponentialMovingWindow object + +class ExponentialMovingWindow: + """Provides exponentially weighted calculations.""" + + def mean(self, numeric_only=False, engine=None, engine_kwargs=None): + """Exponentially weighted moving average.""" + + def var(self, bias=False, numeric_only=False, engine=None, engine_kwargs=None): + """Exponentially weighted moving variance.""" + + def std(self, bias=False, numeric_only=False, engine=None, engine_kwargs=None): + """Exponentially weighted moving standard deviation.""" + + def corr(self, other=None, pairwise=None, numeric_only=False): + """Exponentially weighted moving correlation.""" + + def cov(self, other=None, pairwise=None, bias=False, numeric_only=False): + """Exponentially weighted moving covariance.""" +``` + +## Types + +```python { .api } +# Frequency strings +FrequencyStr = Literal[ + 'B', 'C', 'D', 'W', 'M', 'SM', 'BM', 'CBM', 'MS', 'SMS', 'BMS', 'CBMS', + 'Q', 'BQ', 'QS', 'BQS', 'A', 'Y', 'BA', 'BY', 'AS', 'YS', 'BAS', 'BYS', + 'BH', 'H', 'T', 'min', 'S', 'L', 'ms', 'U', 'us', 'N', 'ns' +] + +# Time zone types +TimeZone = Union[str, datetime.tzinfo, None] + +# Resample rule types +ResampleRule = Union[str, DateOffset] + +# Date parse error handling +DateParseError = Literal['raise', 'coerce', 'ignore'] + +# Timestamp origin types +TimestampOrigin = Union[Literal['unix'], Timestamp, str] + +# Missing value sentinels for datetime +NaT: object # Not-a-Time, pandas equivalent of NaN for datetime + +# Interval closed options +IntervalClosed = Literal['left', 'right', 'both', 'neither'] +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pandas/tile.json b/.tessl/tiles/tessl/pypi-pandas/tile.json new file mode 100644 index 0000000..2878318 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pandas/tile.json @@ -0,0 +1,7 @@ +{ + "name": "tessl/pypi-pandas", + "version": "2.3.0", + "docs": "docs/index.md", + "describes": "pkg:pypi/pandas@2.3.2", + "summary": "Powerful data structures for data analysis, time series, and statistics" +} \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pyarrow/docs/advanced-features.md b/.tessl/tiles/tessl/pypi-pyarrow/docs/advanced-features.md new file mode 100644 index 0000000..758f994 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pyarrow/docs/advanced-features.md @@ -0,0 +1,973 @@ +# Advanced Features + +Specialized functionality including CUDA GPU support, Substrait query integration, execution engine operations, and data interchange protocols for advanced use cases and system integration scenarios. + +## Capabilities + +### CUDA GPU Support + +GPU memory management and operations for high-performance computing workloads using NVIDIA CUDA. + +```python { .api } +class Context: + """ + CUDA context wrapper for device operations. + + Attributes: + - device_number: CUDA device number + - handle: CUDA context handle + """ + + def __init__(self, device_number=0): ... + + def memory_manager(self): + """Get CUDA memory manager.""" + + def synchronize(self): + """Synchronize CUDA operations.""" + + @property + def device_number(self): + """Get device number.""" + +class CudaBuffer: + """ + GPU memory buffer. + + Attributes: + - context: CUDA context + - size: Buffer size in bytes + - address: GPU memory address + - is_mutable: Whether buffer is mutable + """ + + def copy_to_host(self, position=0, nbytes=None, memory_pool=None): + """Copy data from GPU to host memory.""" + + def copy_from_host(self, data, position=0): + """Copy data from host to GPU memory.""" + + def copy_from_device(self, buf, position=0, source_position=0, nbytes=None): + """Copy data from another GPU buffer.""" + + def slice(self, offset, length=None): + """Create buffer slice.""" + + def equals(self, other): + """Check buffer equality.""" + + def export_for_ipc(self): + """Export buffer for inter-process communication.""" + +class HostBuffer: + """ + Pinned host memory buffer for efficient GPU transfers. + + Attributes: + - size: Buffer size in bytes + - address: Host memory address + """ + +class IpcMemHandle: + """ + Inter-process communication memory handle. + + Attributes: + - handle: IPC handle bytes + """ + + def open(self, context): + """Open IPC handle in context.""" + + def serialize(self): + """Serialize handle for IPC.""" + + @classmethod + def from_buffer(cls, buf): + """Create handle from CUDA buffer.""" + +class BufferReader: + """Reader for CUDA buffers.""" + + def __init__(self, buffer): ... + + def read(self, nbytes=None): + """Read data from buffer.""" + + def seek(self, position): + """Seek to position.""" + + def tell(self): + """Get current position.""" + +class BufferWriter: + """Writer for CUDA buffers.""" + + def __init__(self, buffer): ... + + def write(self, data): + """Write data to buffer.""" + + def seek(self, position): + """Seek to position.""" + + def tell(self): + """Get current position.""" + +def new_host_buffer(size, device_number=0): + """ + Create new pinned host buffer. + + Parameters: + - size: int, buffer size in bytes + - device_number: int, CUDA device number + + Returns: + HostBuffer: Pinned host buffer + """ + +def serialize_record_batch(batch, ctx): + """ + Serialize record batch for CUDA transfer. + + Parameters: + - batch: RecordBatch, batch to serialize + - ctx: Context, CUDA context + + Returns: + bytes: Serialized batch + """ + +def read_message(source, memory_pool=None): + """ + Read CUDA IPC message. + + Parameters: + - source: file-like, message source + - memory_pool: MemoryPool, memory pool for allocation + + Returns: + Message: CUDA message + """ + +def read_record_batch(message, schema, memory_pool=None): + """ + Read record batch from CUDA message. + + Parameters: + - message: Message, CUDA message + - schema: Schema, batch schema + - memory_pool: MemoryPool, memory pool for allocation + + Returns: + RecordBatch: Record batch + """ +``` + +### Substrait Query Integration + +Integration with Substrait for standardized query representation and cross-system compatibility. + +```python { .api } +def run_query(plan, table_provider=None): + """ + Execute Substrait query plan. + + Parameters: + - plan: bytes, serialized Substrait plan + - table_provider: callable, function to provide tables by name + + Returns: + Table: Query result table + """ + +def get_supported_functions(): + """ + Get list of supported Substrait functions. + + Returns: + list of str: Supported function names + """ + +def deserialize_expressions(data, schema): + """ + Deserialize Substrait expressions. + + Parameters: + - data: bytes, serialized Substrait expressions + - schema: Schema, input schema + + Returns: + BoundExpressions: Bound expressions with Arrow types + """ + +def serialize_expressions(expressions, names, schema): + """ + Serialize Arrow expressions to Substrait. + + Parameters: + - expressions: list of Expression, Arrow expressions + - names: list of str, expression names + - schema: Schema, input schema + + Returns: + bytes: Serialized Substrait expressions + """ + +def deserialize_schema(data): + """ + Deserialize Substrait schema. + + Parameters: + - data: bytes, serialized Substrait schema + + Returns: + SubstraitSchema: Substrait schema representation + """ + +def serialize_schema(schema): + """ + Serialize Arrow schema to Substrait. + + Parameters: + - schema: Schema, Arrow schema + + Returns: + bytes: Serialized Substrait schema + """ + +class BoundExpressions: + """ + Bound Substrait expressions with Arrow types. + + Attributes: + - expressions: List of bound expressions + - schema: Input schema + """ + + def evaluate(self, batch): + """Evaluate expressions on record batch.""" + +class SubstraitSchema: + """ + Substrait schema representation. + + Attributes: + - names: Field names + - types: Field types + """ + + def to_arrow_schema(self): + """Convert to Arrow schema.""" +``` + +### Acero Execution Engine + +Low-level execution engine operations for building custom query processing pipelines. + +```python { .api } +class Declaration: + """ + Execution plan node declaration. + + Attributes: + - factory_name: Node factory name + - options: Node options + - inputs: Input declarations + """ + + def __init__(self, factory_name, options, inputs=None): ... + +class ExecNodeOptions: + """Base execution node options.""" + +class TableSourceNodeOptions(ExecNodeOptions): + """ + Table source node configuration. + + Attributes: + - table: Source table + """ + + def __init__(self, table): ... + +class FilterNodeOptions(ExecNodeOptions): + """ + Filter node configuration. + + Attributes: + - filter_expression: Filter expression + """ + + def __init__(self, filter_expression): ... + +class ProjectNodeOptions(ExecNodeOptions): + """ + Projection node configuration. + + Attributes: + - expressions: Projection expressions + - names: Output field names + """ + + def __init__(self, expressions, names=None): ... + +class AggregateNodeOptions(ExecNodeOptions): + """ + Aggregation node configuration. + + Attributes: + - aggregates: Aggregate functions + - keys: Grouping keys + """ + + def __init__(self, aggregates, keys=None): ... + +class OrderByNodeOptions(ExecNodeOptions): + """ + Sorting node configuration. + + Attributes: + - sort_keys: Sort key expressions + - ordering: Sort ordering (ascending/descending) + """ + + def __init__(self, sort_keys, ordering=None): ... + +class HashJoinNodeOptions(ExecNodeOptions): + """ + Hash join node configuration. + + Attributes: + - join_type: Type of join + - left_keys: Left join keys + - right_keys: Right join keys + - filter: Optional join filter + """ + + def __init__(self, join_type, left_keys, right_keys, filter=None): ... + +class AsofJoinNodeOptions(ExecNodeOptions): + """ + As-of join node configuration. + + Attributes: + - left_keys: Left join keys + - right_keys: Right join keys + - on_key: Temporal join key + - tolerance: Join tolerance + """ + + def __init__(self, left_keys, right_keys, on_key, tolerance=None): ... + +class ScanNodeOptions(ExecNodeOptions): + """ + Dataset scan node configuration. + + Attributes: + - dataset: Dataset to scan + - filter: Scan filter + - projection: Column projection + """ + + def __init__(self, dataset, filter=None, projection=None): ... +``` + +### Data Interchange Protocol + +Support for data interchange protocols enabling interoperability with other data systems. + +```python { .api } +def from_dataframe(df, preserve_index=None, types_mapper=None): + """ + Convert dataframe interchange object to Arrow Table. + + Parameters: + - df: object implementing dataframe interchange protocol + - preserve_index: bool, preserve dataframe index + - types_mapper: callable, custom type mapping function + + Returns: + Table: Arrow table from dataframe interchange object + """ +``` + +### JVM Integration + +Integration with Java Virtual Machine for interoperability with Java-based systems. + +```python { .api } +def set_default_jvm_path(path): + """ + Set default JVM path. + + Parameters: + - path: str, path to JVM library + """ + +def get_default_jvm_path(): + """ + Get default JVM path. + + Returns: + str: JVM library path + """ + +def set_default_jvm_options(options): + """ + Set default JVM options. + + Parameters: + - options: list of str, JVM startup options + """ + +def get_default_jvm_options(): + """ + Get default JVM options. + + Returns: + list of str: JVM startup options + """ +``` + +### Configuration and Environment + +Global configuration and environment management for PyArrow behavior. + +```python { .api } +def get_include(): + """ + Get Arrow C++ include directory path. + + Returns: + str: Include directory path + """ + +def get_libraries(): + """ + Get list of libraries for linking. + + Returns: + list of str: Library names + """ + +def get_library_dirs(): + """ + Get library directories for linking. + + Returns: + list of str: Library directory paths + """ + +def create_library_symlinks(): + """Create library symlinks for wheel installations.""" + +def set_timezone_db_path(path): + """ + Set timezone database path. + + Parameters: + - path: str, path to timezone database + """ + +def cpu_count(): + """ + Get number of CPU cores. + + Returns: + int: Number of CPU cores + """ + +def set_cpu_count(count): + """ + Set CPU core count for computations. + + Parameters: + - count: int, number of CPU cores to use + """ + +def io_thread_count(): + """ + Get I/O thread count. + + Returns: + int: Number of I/O threads + """ + +def set_io_thread_count(count): + """ + Set I/O thread count. + + Parameters: + - count: int, number of I/O threads to use + """ + +def enable_signal_handlers(enable): + """ + Enable/disable signal handling. + + Parameters: + - enable: bool, whether to enable signal handlers + """ +``` + +## Usage Examples + +### CUDA GPU Operations + +```python +import pyarrow as pa + +# Check if CUDA is available +try: + import pyarrow.cuda as cuda + print("CUDA support available") +except ImportError: + print("CUDA support not available") + exit() + +# Create CUDA context +ctx = cuda.Context(device_number=0) +print(f"CUDA device: {ctx.device_number}") + +# Create host buffer +host_data = b"Hello, CUDA!" * 1000 +host_buffer = cuda.new_host_buffer(len(host_data)) + +# Copy data to host buffer (conceptual - actual API may differ) +# host_buffer.copy_from_host(host_data) + +# Create GPU buffer +gpu_buffer = ctx.memory_manager().allocate(len(host_data)) + +# Copy from host to GPU +gpu_buffer.copy_from_host(host_data) + +# Copy back to host +result_buffer = gpu_buffer.copy_to_host() +print(f"GPU round-trip successful: {len(result_buffer)} bytes") + +# Create Arrow array on GPU (conceptual) +cpu_array = pa.array([1, 2, 3, 4, 5]) +# Note: Actual GPU array creation would require more setup + +# IPC with GPU buffers +ipc_handle = cuda.IpcMemHandle.from_buffer(gpu_buffer) +serialized_handle = ipc_handle.serialize() +print(f"Serialized IPC handle: {len(serialized_handle)} bytes") + +# Clean up +ctx.synchronize() +``` + +### Substrait Query Integration + +```python +import pyarrow as pa +import pyarrow.substrait as substrait +import pyarrow.compute as pc + +# Check supported Substrait functions +supported_functions = substrait.get_supported_functions() +print(f"Supported functions: {len(supported_functions)}") +print(f"First 10: {supported_functions[:10]}") + +# Create sample data +table = pa.table({ + 'id': range(100), + 'category': ['A', 'B', 'C'] * 34, # Cycling through categories + 'value': [i * 1.5 for i in range(100)] +}) + +# Define table provider for Substrait +def table_provider(names): + """Provide tables by name for Substrait execution.""" + if names == ['main_table']: + return table + else: + raise ValueError(f"Unknown table: {names}") + +# Example: Simple filter query (conceptual) +# In practice, you would create or receive a Substrait plan +# This is a simplified example showing the concept + +# Create expressions and serialize to Substrait +expressions = [ + pc.field('value'), + pc.greater(pc.field('value'), pc.scalar(50)) +] +names = ['value', 'filter_condition'] + +try: + # Serialize expressions to Substrait format + serialized_expressions = substrait.serialize_expressions( + expressions, names, table.schema + ) + print(f"Serialized expressions: {len(serialized_expressions)} bytes") + + # Deserialize expressions back + bound_expressions = substrait.deserialize_expressions( + serialized_expressions, table.schema + ) + print(f"Bound expressions: {bound_expressions}") + +except Exception as e: + print(f"Substrait operations not fully available: {e}") + +# Schema serialization +try: + serialized_schema = substrait.serialize_schema(table.schema) + print(f"Serialized schema: {len(serialized_schema)} bytes") + + deserialized_schema = substrait.deserialize_schema(serialized_schema) + print(f"Deserialized schema: {deserialized_schema}") + +except Exception as e: + print(f"Schema serialization not available: {e}") +``` + +### Acero Execution Engine + +```python +import pyarrow as pa +import pyarrow.acero as acero +import pyarrow.compute as pc + +# Create sample tables +table1 = pa.table({ + 'id': [1, 2, 3, 4, 5], + 'name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'], + 'dept_id': [10, 20, 10, 30, 20] +}) + +table2 = pa.table({ + 'dept_id': [10, 20, 30], + 'dept_name': ['Engineering', 'Sales', 'Marketing'] +}) + +# Create execution plan declarations +source1 = acero.Declaration( + "table_source", + acero.TableSourceNodeOptions(table1) +) + +source2 = acero.Declaration( + "table_source", + acero.TableSourceNodeOptions(table2) +) + +# Filter declaration +filter_decl = acero.Declaration( + "filter", + acero.FilterNodeOptions(pc.greater(pc.field('id'), pc.scalar(2))), + inputs=[source1] +) + +# Projection declaration +project_decl = acero.Declaration( + "project", + acero.ProjectNodeOptions([ + pc.field('id'), + pc.field('name'), + pc.field('dept_id') + ]), + inputs=[filter_decl] +) + +# Join declaration +join_decl = acero.Declaration( + "hashjoin", + acero.HashJoinNodeOptions( + join_type="inner", + left_keys=[pc.field('dept_id')], + right_keys=[pc.field('dept_id')] + ), + inputs=[project_decl, source2] +) + +print("Created execution plan with filter, projection, and join") +print("Note: Actual execution requires Acero runtime") + +# Example of aggregation node +agg_decl = acero.Declaration( + "aggregate", + acero.AggregateNodeOptions( + aggregates=[ + ("count", pc.field('id')), + ("mean", pc.field('id')) + ], + keys=[pc.field('dept_name')] + ), + inputs=[join_decl] +) + +print("Added aggregation node to execution plan") +``` + +### Data Interchange Protocol + +```python +import pyarrow as pa +import pyarrow.interchange as interchange + +# Create a mock dataframe-like object that implements interchange protocol +class MockDataFrame: + """Mock dataframe implementing interchange protocol.""" + + def __init__(self, data): + self.data = data + self._schema = pa.schema([ + pa.field(name, pa.infer_type(column)) + for name, column in data.items() + ]) + + def __dataframe__(self, nan_as_null=False, allow_copy=True): + """Implement dataframe interchange protocol.""" + # This is a simplified mock - real implementation would be more complex + return self + + def select_columns(self, indices): + """Select columns by indices.""" + selected_data = {} + for i, (name, column) in enumerate(self.data.items()): + if i in indices: + selected_data[name] = column + return MockDataFrame(selected_data) + + def get_chunks(self, n_chunks=None): + """Get data chunks.""" + # Simplified - return single chunk + return [self] + + def to_arrow_table(self): + """Convert to Arrow table.""" + return pa.table(self.data, schema=self._schema) + +# Create mock dataframe +mock_df_data = { + 'integers': [1, 2, 3, 4, 5], + 'floats': [1.1, 2.2, 3.3, 4.4, 5.5], + 'strings': ['a', 'b', 'c', 'd', 'e'] +} +mock_df = MockDataFrame(mock_df_data) + +try: + # Convert using interchange protocol + table = interchange.from_dataframe(mock_df) + print(f"Converted table: {table.schema}") + print(f"Rows: {len(table)}") + +except Exception as e: + print(f"Interchange conversion failed: {e}") + # Fallback to direct conversion + table = mock_df.to_arrow_table() + print(f"Direct conversion: {table.schema}") + +# Work with real pandas DataFrame (if available) +try: + import pandas as pd + + # Create pandas DataFrame + df = pd.DataFrame({ + 'x': range(10), + 'y': [i ** 2 for i in range(10)], + 'category': ['A', 'B'] * 5 + }) + + # Convert using interchange protocol + table_from_pandas = interchange.from_dataframe(df) + print(f"Pandas conversion: {table_from_pandas.schema}") + print(f"Rows: {len(table_from_pandas)}") + +except ImportError: + print("Pandas not available for interchange demo") +except Exception as e: + print(f"Pandas interchange failed: {e}") +``` + +### JVM Integration + +```python +import pyarrow as pa + +# JVM integration (conceptual example) +try: + # Set JVM path (platform-specific) + import platform + if platform.system() == "Linux": + jvm_path = "/usr/lib/jvm/default/lib/server/libjvm.so" + elif platform.system() == "Darwin": # macOS + jvm_path = "/Library/Java/JavaVirtualMachines/*/Contents/Home/lib/server/libjvm.dylib" + elif platform.system() == "Windows": + jvm_path = "C:\\Program Files\\Java\\*\\bin\\server\\jvm.dll" + else: + jvm_path = None + + if jvm_path: + pa.set_default_jvm_path(jvm_path) + current_path = pa.get_default_jvm_path() + print(f"JVM path set to: {current_path}") + + # Set JVM options + jvm_options = [ + "-Xmx1g", # Maximum heap size + "-XX:+UseG1GC", # Use G1 garbage collector + "-Djava.awt.headless=true" # Headless mode + ] + pa.set_default_jvm_options(jvm_options) + current_options = pa.get_default_jvm_options() + print(f"JVM options: {current_options}") + +except AttributeError: + print("JVM integration functions not available") +``` + +### Performance Monitoring and Configuration + +```python +import pyarrow as pa +import time + +# System information +print("=== PyArrow System Information ===") +pa.show_versions() +print() + +print("=== Runtime Information ===") +pa.show_info() +print() + +# CPU configuration +original_cpu_count = pa.cpu_count() +print(f"Original CPU count: {original_cpu_count}") + +# Set lower CPU count for testing +pa.set_cpu_count(max(1, original_cpu_count // 2)) +print(f"Reduced CPU count: {pa.cpu_count()}") + +# I/O thread configuration +original_io_threads = pa.io_thread_count() +print(f"Original I/O threads: {original_io_threads}") + +pa.set_io_thread_count(4) +print(f"Set I/O threads: {pa.io_thread_count()}") + +# Memory monitoring +initial_memory = pa.total_allocated_bytes() +print(f"Initial memory: {initial_memory} bytes") + +# Create some data to test memory tracking +large_arrays = [] +for i in range(5): + arr = pa.array(range(100000)) + large_arrays.append(arr) + +peak_memory = pa.total_allocated_bytes() +print(f"Peak memory: {peak_memory} bytes") +print(f"Memory increase: {peak_memory - initial_memory} bytes") + +# Clear arrays +large_arrays.clear() +import gc +gc.collect() + +final_memory = pa.total_allocated_bytes() +print(f"Final memory: {final_memory} bytes") + +# Restore original settings +pa.set_cpu_count(original_cpu_count) +pa.set_io_thread_count(original_io_threads) +print(f"Restored CPU count: {pa.cpu_count()}") +print(f"Restored I/O threads: {pa.io_thread_count()}") + +# Signal handling +pa.enable_signal_handlers(True) +print("Signal handlers enabled") + +# Library information for development +print(f"Include directory: {pa.get_include()}") +print(f"Libraries: {pa.get_libraries()}") +print(f"Library directories: {pa.get_library_dirs()[:3]}...") # First 3 +``` + +### Advanced Data Processing Pipeline + +```python +import pyarrow as pa +import pyarrow.compute as pc +import pyarrow.dataset as ds +import tempfile +import os + +def advanced_processing_pipeline(): + """Demonstrate advanced PyArrow features in a processing pipeline.""" + + # Create sample data with complex types + data = pa.table({ + 'id': range(1000), + 'timestamp': pa.array([ + f'2023-{(i % 12) + 1:02d}-{(i % 28) + 1:02d} {(i % 24):02d}:00:00' + for i in range(1000) + ], type=pa.timestamp('s')), + 'values': [ + [float(j) for j in range(i % 5 + 1)] + for i in range(1000) + ], + 'metadata': [ + {'source': f'sensor_{i % 10}', 'quality': (i % 100) / 100.0} + for i in range(1000) + ] + }) + + with tempfile.TemporaryDirectory() as tmpdir: + # Write partitioned dataset + ds.write_dataset( + data, + tmpdir, + format='parquet', + partitioning=['id'], # Partition by id ranges + max_rows_per_file=100, + compression='snappy' + ) + + # Read as dataset + dataset = ds.dataset(tmpdir, format='parquet') + + print(f"Dataset schema: {dataset.schema}") + print(f"Dataset files: {len(list(dataset.get_fragments()))}") + + # Complex filtering and computation + # Filter by timestamp and compute statistics on nested data + filtered_data = dataset.to_table( + filter=( + pc.greater(pc.field('timestamp'), + pc.strptime(['2023-06-01'], format='%Y-%m-%d', unit='s')[0]) & + pc.less(pc.field('timestamp'), + pc.strptime(['2023-09-01'], format='%Y-%m-%d', unit='s')[0]) + ), + columns=['id', 'timestamp', 'values'] + ) + + print(f"Filtered data: {len(filtered_data)} rows") + + # Compute statistics on list column + list_lengths = pc.list_size(filtered_data['values']) + avg_list_length = pc.mean(list_lengths) + + print(f"Average list length: {avg_list_length}") + + # Flatten list column and compute aggregate + flattened_values = pc.list_flatten(filtered_data['values']) + total_sum = pc.sum(flattened_values) + + print(f"Sum of all flattened values: {total_sum}") + + return filtered_data + +# Run advanced pipeline +try: + result = advanced_processing_pipeline() + print(f"Pipeline completed successfully: {len(result)} rows processed") +except Exception as e: + print(f"Pipeline error: {e}") +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pyarrow/docs/arrow-flight.md b/.tessl/tiles/tessl/pypi-pyarrow/docs/arrow-flight.md new file mode 100644 index 0000000..6de0cdc --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pyarrow/docs/arrow-flight.md @@ -0,0 +1,1169 @@ +# Arrow Flight RPC + +High-performance RPC framework for distributed data services. Provides client-server architecture for streaming large datasets with authentication, metadata handling, custom middleware support, and efficient data transfer over networks. + +## Capabilities + +### Client Operations + +Connect to Flight services and perform data operations with high-performance streaming. + +```python { .api } +def connect(location, tls_certificates=None, cert_chain=None, private_key=None, auth_handler=None, call_options=None, tls_root_certs=None, tls_override_hostname=None, middleware=None, write_size_limit_bytes=None, disable_server_verification=False): + """ + Connect to Flight service. + + Parameters: + - location: str or Location, service location + - tls_certificates: list, TLS certificates + - cert_chain: bytes, certificate chain for mTLS + - private_key: bytes, private key for mTLS + - auth_handler: ClientAuthHandler, authentication handler + - call_options: FlightCallOptions, default call options + - tls_root_certs: bytes, root certificates for TLS + - tls_override_hostname: str, override hostname for TLS + - middleware: list, client middleware + - write_size_limit_bytes: int, write size limit + - disable_server_verification: bool, disable server verification + + Returns: + FlightClient: Connected Flight client + """ + +class FlightClient: + """ + Flight client for connecting to Flight services. + """ + + def authenticate(self, auth_handler, options=None): + """ + Authenticate with server. + + Parameters: + - auth_handler: ClientAuthHandler, authentication handler + - options: FlightCallOptions, call options + """ + + def list_flights(self, criteria=None, options=None): + """ + List available flights. + + Parameters: + - criteria: bytes, listing criteria + - options: FlightCallOptions, call options + + Returns: + iterator: Iterator of FlightInfo objects + """ + + def get_flight_info(self, descriptor, options=None): + """ + Get flight information. + + Parameters: + - descriptor: FlightDescriptor, flight descriptor + - options: FlightCallOptions, call options + + Returns: + FlightInfo: Flight information + """ + + def get_schema(self, descriptor, options=None): + """ + Get flight schema. + + Parameters: + - descriptor: FlightDescriptor, flight descriptor + - options: FlightCallOptions, call options + + Returns: + Schema: Flight schema + """ + + def do_get(self, ticket, options=None): + """ + Retrieve data stream. + + Parameters: + - ticket: Ticket, data ticket + - options: FlightCallOptions, call options + + Returns: + FlightStreamReader: Data stream reader + """ + + def do_put(self, descriptor, schema, options=None): + """ + Send data stream. + + Parameters: + - descriptor: FlightDescriptor, flight descriptor + - schema: Schema, data schema + - options: FlightCallOptions, call options + + Returns: + FlightStreamWriter: Data stream writer + """ + + def do_exchange(self, descriptor, schema, options=None): + """ + Bidirectional data exchange. + + Parameters: + - descriptor: FlightDescriptor, flight descriptor + - schema: Schema, data schema + - options: FlightCallOptions, call options + + Returns: + FlightStreamWriter: Exchange stream writer + """ + + def list_actions(self, options=None): + """ + List available actions. + + Parameters: + - options: FlightCallOptions, call options + + Returns: + iterator: Iterator of ActionType objects + """ + + def do_action(self, action, options=None): + """ + Execute action. + + Parameters: + - action: Action, action to execute + - options: FlightCallOptions, call options + + Returns: + iterator: Iterator of Result objects + """ + + def close(self): + """Close client connection.""" + +class Location: + """ + Flight service location. + + Attributes: + - uri: Location URI + """ + + @classmethod + def for_grpc_tcp(cls, host, port): + """Create TCP location.""" + + @classmethod + def for_grpc_tls(cls, host, port): + """Create TLS location.""" + + @classmethod + def for_grpc_unix(cls, path): + """Create Unix socket location.""" + + def __str__(self): ... + def __eq__(self, other): ... +``` + +### Server Implementation + +Base classes and interfaces for implementing Flight servers. + +```python { .api } +class FlightServerBase: + """ + Base class for implementing Flight servers. + """ + + def list_flights(self, context, criteria): + """ + List available flights. + + Parameters: + - context: ServerCallContext, call context + - criteria: bytes, listing criteria + + Returns: + iterator: Iterator of FlightInfo objects + """ + raise NotImplementedError + + def get_flight_info(self, context, descriptor): + """ + Get flight information. + + Parameters: + - context: ServerCallContext, call context + - descriptor: FlightDescriptor, flight descriptor + + Returns: + FlightInfo: Flight information + """ + raise NotImplementedError + + def get_schema(self, context, descriptor): + """ + Get flight schema. + + Parameters: + - context: ServerCallContext, call context + - descriptor: FlightDescriptor, flight descriptor + + Returns: + SchemaResult: Schema result + """ + raise NotImplementedError + + def do_get(self, context, ticket): + """ + Handle data retrieval. + + Parameters: + - context: ServerCallContext, call context + - ticket: Ticket, data ticket + + Returns: + FlightDataStream: Data stream + """ + raise NotImplementedError + + def do_put(self, context, descriptor, reader, writer): + """ + Handle data upload. + + Parameters: + - context: ServerCallContext, call context + - descriptor: FlightDescriptor, flight descriptor + - reader: FlightStreamReader, data stream reader + - writer: FlightMetadataWriter, metadata writer + """ + raise NotImplementedError + + def do_exchange(self, context, descriptor, reader, writer): + """ + Handle bidirectional data exchange. + + Parameters: + - context: ServerCallContext, call context + - descriptor: FlightDescriptor, flight descriptor + - reader: FlightStreamReader, data stream reader + - writer: FlightStreamWriter, data stream writer + """ + raise NotImplementedError + + def list_actions(self, context): + """ + List available actions. + + Parameters: + - context: ServerCallContext, call context + + Returns: + iterator: Iterator of ActionType objects + """ + return [] + + def do_action(self, context, action): + """ + Execute action. + + Parameters: + - context: ServerCallContext, call context + - action: Action, action to execute + + Returns: + iterator: Iterator of Result objects + """ + raise NotImplementedError + +class ServerCallContext: + """ + Server call context. + + Attributes: + - peer_identity: Client identity + - peer: Client peer information + - method: Called method + """ + + def is_cancelled(self): + """Check if call is cancelled.""" + + def add_header(self, key, value): + """Add response header.""" + + def add_trailer(self, key, value): + """Add response trailer.""" +``` + +### Data Streaming + +Classes for handling data streams in Flight operations with efficient serialization. + +```python { .api } +class FlightDataStream: + """Base class for Flight data streams.""" + + def schema(self): + """Get stream schema.""" + raise NotImplementedError + + def __iter__(self): + """Iterate over stream chunks.""" + raise NotImplementedError + +class FlightStreamReader: + """ + Flight stream reader. + + Attributes: + - schema: Stream schema + """ + + def __iter__(self): ... + + def read_next(self): + """Read next chunk.""" + + def read_chunk(self): + """Read chunk with metadata.""" + + def read_all(self): + """Read all data as table.""" + + def read_pandas(self): + """Read all data as pandas DataFrame.""" + +class FlightStreamWriter: + """ + Flight stream writer. + + Attributes: + - schema: Stream schema + """ + + def write_batch(self, batch): + """Write record batch.""" + + def write_table(self, table, max_chunksize=None): + """Write table.""" + + def write_with_metadata(self, batch, app_metadata=None): + """Write batch with metadata.""" + + def done_writing(self): + """Signal end of writing.""" + + def close(self): + """Close writer.""" + +class FlightStreamChunk: + """ + Flight stream chunk. + + Attributes: + - data: Record batch data + - app_metadata: Application metadata + """ + +class RecordBatchStream(FlightDataStream): + """Record batch-based Flight stream.""" + + def __init__(self, data_source): ... + +class GeneratorStream(FlightDataStream): + """Generator-based Flight stream.""" + + def __init__(self, schema, generator): ... +``` + +### Descriptors and Information + +Flight descriptors and metadata for identifying and describing data streams. + +```python { .api } +class FlightDescriptor: + """ + Flight descriptor for identifying data streams. + + Attributes: + - descriptor_type: Descriptor type + - command: Command bytes (for COMMAND type) + - path: Path components (for PATH type) + """ + + @classmethod + def for_command(cls, command): + """ + Create command descriptor. + + Parameters: + - command: bytes, command data + + Returns: + FlightDescriptor: Command descriptor + """ + + @classmethod + def for_path(cls, *path): + """ + Create path descriptor. + + Parameters: + - path: str components, path components + + Returns: + FlightDescriptor: Path descriptor + """ + + def __eq__(self, other): ... + def __hash__(self): ... + +class DescriptorType: + """Descriptor type enumeration.""" + UNKNOWN = ... + PATH = ... + CMD = ... + +class FlightInfo: + """ + Flight information. + + Attributes: + - descriptor: Flight descriptor + - endpoints: List of flight endpoints + - total_records: Total number of records + - total_bytes: Total bytes + - schema: Flight schema + - ordered: Whether data is ordered + """ + + @classmethod + def for_table(cls, table, descriptor, endpoints=None): + """Create FlightInfo for table.""" + +class FlightEndpoint: + """ + Flight endpoint. + + Attributes: + - ticket: Data ticket + - locations: List of locations + """ + + def __eq__(self, other): ... + +class Ticket: + """ + Flight ticket for data retrieval. + + Attributes: + - ticket: Ticket bytes + """ + + def __eq__(self, other): ... + +class SchemaResult: + """ + Schema result. + + Attributes: + - schema: Arrow schema + """ +``` + +### Authentication + +Authentication handlers for client and server authentication. + +```python { .api } +class BasicAuth: + """ + Basic username/password authentication. + """ + + def __init__(self, username, password): ... + + @property + def username(self): ... + + @property + def password(self): ... + +class ClientAuthHandler: + """Client-side authentication handler.""" + + def authenticate(self, outgoing, incoming): + """ + Authenticate client. + + Parameters: + - outgoing: outgoing metadata + - incoming: incoming metadata + """ + raise NotImplementedError + + def get_token(self): + """Get authentication token.""" + return None + +class ServerAuthHandler: + """Server-side authentication handler.""" + + def authenticate(self, outgoing, incoming): + """ + Authenticate request. + + Parameters: + - outgoing: outgoing metadata + - incoming: incoming metadata + + Returns: + str: Client identity + """ + raise NotImplementedError + + def is_valid(self, token): + """ + Validate authentication token. + + Parameters: + - token: str, authentication token + + Returns: + str: Client identity if valid + """ + raise NotImplementedError +``` + +### Middleware + +Middleware system for intercepting and modifying Flight calls. + +```python { .api } +class ClientMiddleware: + """Client-side middleware interface.""" + + def sending_headers(self): + """Called when sending headers.""" + pass + + def received_headers(self, headers): + """Called when receiving headers.""" + pass + + def received_trailers(self, trailers): + """Called when receiving trailers.""" + pass + +class ClientMiddlewareFactory: + """Factory for client middleware.""" + + def start_call(self, info): + """ + Start middleware for call. + + Parameters: + - info: CallInfo, call information + + Returns: + ClientMiddleware: Middleware instance + """ + raise NotImplementedError + +class ServerMiddleware: + """Server-side middleware interface.""" + + def sending_headers(self): + """Called when sending headers.""" + pass + + def call_completed(self, exception): + """Called when call completes.""" + pass + +class ServerMiddlewareFactory: + """Factory for server middleware.""" + + def start_call(self, info, headers): + """ + Start middleware for call. + + Parameters: + - info: CallInfo, call information + - headers: dict, request headers + + Returns: + ServerMiddleware: Middleware instance + """ + raise NotImplementedError + +class TracingServerMiddlewareFactory(ServerMiddlewareFactory): + """Built-in tracing middleware factory.""" + +class CallInfo: + """ + Call information. + + Attributes: + - method: Flight method + """ + +class FlightMethod: + """Flight RPC method enumeration.""" + LIST_FLIGHTS = ... + GET_FLIGHT_INFO = ... + GET_SCHEMA = ... + DO_GET = ... + DO_PUT = ... + DO_EXCHANGE = ... + LIST_ACTIONS = ... + DO_ACTION = ... +``` + +### Actions and Results + +Custom actions and results for extending Flight functionality. + +```python { .api } +class Action: + """ + Flight action request. + + Attributes: + - type: Action type + - body: Action body bytes + """ + + def __eq__(self, other): ... + +class ActionType: + """ + Flight action type information. + + Attributes: + - type: Action type string + - description: Action description + """ + + def __eq__(self, other): ... + +class Result: + """ + Flight action result. + + Attributes: + - body: Result body bytes + """ + + def __eq__(self, other): ... +``` + +### Metadata and Options + +Configuration options and metadata handling for Flight operations. + +```python { .api } +class FlightCallOptions: + """ + Options for Flight calls. + + Attributes: + - headers: Request headers + - timeout: Call timeout + """ + + def __init__(self, headers=None, timeout=None): ... + +class FlightMetadataReader: + """Flight metadata reader.""" + + def read(self): + """Read metadata.""" + +class FlightMetadataWriter: + """Flight metadata writer.""" + + def write(self, metadata): + """Write metadata.""" + +class MetadataRecordBatchReader: + """Record batch reader with metadata.""" + +class MetadataRecordBatchWriter: + """Record batch writer with metadata.""" +``` + +### Security + +Security configuration including TLS certificates and encryption. + +```python { .api } +class CertKeyPair: + """ + TLS certificate and key pair. + + Attributes: + - cert: Certificate bytes + - key: Private key bytes + """ + + def __init__(self, cert, key): ... +``` + +### Exceptions + +Flight-specific exceptions for error handling. + +```python { .api } +class FlightError(Exception): + """Base Flight exception.""" + +class FlightInternalError(FlightError): + """Internal Flight error.""" + +class FlightTimedOutError(FlightError): + """Flight timeout error.""" + +class FlightCancelledError(FlightError): + """Flight cancellation error.""" + +class FlightUnauthenticatedError(FlightError): + """Authentication required error.""" + +class FlightUnauthorizedError(FlightError): + """Authorization failed error.""" + +class FlightUnavailableError(FlightError): + """Service unavailable error.""" + +class FlightServerError(FlightError): + """Server-side error.""" + +class FlightWriteSizeExceededError(FlightError): + """Write size limit exceeded error.""" +``` + +## Usage Examples + +### Basic Client Usage + +```python +import pyarrow as pa +import pyarrow.flight as flight + +# Connect to Flight server +client = flight.connect("grpc://localhost:8080") + +# List available flights +for flight_info in client.list_flights(): + print(f"Flight: {flight_info.descriptor}") + print(f" Records: {flight_info.total_records}") + print(f" Bytes: {flight_info.total_bytes}") + print(f" Schema: {flight_info.schema}") + +# Get specific flight info +descriptor = flight.FlightDescriptor.for_path("dataset", "table1") +info = client.get_flight_info(descriptor) +print(f"Flight info: {info}") + +# Get data +for endpoint in info.endpoints: + stream_reader = client.do_get(endpoint.ticket) + table = stream_reader.read_all() + print(f"Retrieved table: {len(table)} rows, {len(table.columns)} columns") + +# Upload data +upload_descriptor = flight.FlightDescriptor.for_path("uploads", "new_data") +table_to_upload = pa.table({ + 'id': [1, 2, 3, 4, 5], + 'value': [10.5, 20.3, 30.1, 40.7, 50.2] +}) + +writer, metadata_reader = client.do_put(upload_descriptor, table_to_upload.schema) +writer.write_table(table_to_upload) +writer.close() + +# Execute action +action = flight.Action("list_tables", b"") +results = client.do_action(action) +for result in results: + print(f"Action result: {result.body}") + +client.close() +``` + +### Server Implementation + +```python +import pyarrow as pa +import pyarrow.flight as flight +import threading + +class DataFlightServer(flight.FlightServerBase): + """Example Flight server implementation.""" + + def __init__(self): + super().__init__() + self.data_store = {} + self.lock = threading.Lock() + + # Initialize with sample data + self.data_store["dataset/sales"] = pa.table({ + 'date': ['2023-01-01', '2023-01-02', '2023-01-03'], + 'amount': [100.0, 150.0, 200.0], + 'region': ['North', 'South', 'East'] + }) + + self.data_store["dataset/products"] = pa.table({ + 'id': [1, 2, 3], + 'name': ['Widget A', 'Widget B', 'Widget C'], + 'price': [10.99, 15.99, 20.99] + }) + + def list_flights(self, context, criteria): + """List available flights.""" + with self.lock: + for path, table in self.data_store.items(): + descriptor = flight.FlightDescriptor.for_path(*path.split('/')) + endpoints = [flight.FlightEndpoint( + flight.Ticket(path.encode()), + ["grpc://localhost:8080"] + )] + yield flight.FlightInfo.for_table(table, descriptor, endpoints) + + def get_flight_info(self, context, descriptor): + """Get flight information.""" + path = '/'.join(descriptor.path) + + with self.lock: + if path not in self.data_store: + raise flight.FlightUnavailableError(f"Unknown path: {path}") + + table = self.data_store[path] + endpoints = [flight.FlightEndpoint( + flight.Ticket(path.encode()), + ["grpc://localhost:8080"] + )] + return flight.FlightInfo.for_table(table, descriptor, endpoints) + + def get_schema(self, context, descriptor): + """Get flight schema.""" + path = '/'.join(descriptor.path) + + with self.lock: + if path not in self.data_store: + raise flight.FlightUnavailableError(f"Unknown path: {path}") + + table = self.data_store[path] + return flight.SchemaResult(table.schema) + + def do_get(self, context, ticket): + """Retrieve data stream.""" + path = ticket.ticket.decode() + + with self.lock: + if path not in self.data_store: + raise flight.FlightUnavailableError(f"Unknown ticket: {path}") + + table = self.data_store[path] + return flight.RecordBatchStream(table) + + def do_put(self, context, descriptor, reader, writer): + """Handle data upload.""" + path = '/'.join(descriptor.path) + + # Read all data + table = reader.read_all() + + with self.lock: + self.data_store[path] = table + + print(f"Stored table at {path}: {len(table)} rows") + + def list_actions(self, context): + """List available actions.""" + return [ + flight.ActionType("list_tables", "List all stored tables"), + flight.ActionType("get_stats", "Get server statistics") + ] + + def do_action(self, context, action): + """Execute action.""" + if action.type == "list_tables": + with self.lock: + tables = list(self.data_store.keys()) + yield flight.Result('\n'.join(tables).encode()) + + elif action.type == "get_stats": + with self.lock: + stats = { + 'table_count': len(self.data_store), + 'total_rows': sum(len(table) for table in self.data_store.values()) + } + yield flight.Result(str(stats).encode()) + + else: + raise flight.FlightUnavailableError(f"Unknown action: {action.type}") + +# Run server +if __name__ == "__main__": + server = DataFlightServer() + location = flight.Location.for_grpc_tcp("localhost", 8080) + + # Note: This is conceptual - actual server startup requires more setup + print(f"Starting server at {location}") + # server.serve(location) # Actual implementation would differ +``` + +### Authentication Example + +```python +import pyarrow.flight as flight + +class SimpleAuthHandler(flight.ServerAuthHandler): + """Simple authentication handler.""" + + def __init__(self): + self.valid_tokens = {"user123": "secret456"} + + def authenticate(self, outgoing, incoming): + """Authenticate request.""" + # Extract credentials from incoming headers + username = None + password = None + + for header in incoming: + if header[0] == b'username': + username = header[1].decode() + elif header[0] == b'password': + password = header[1].decode() + + if username in self.valid_tokens and self.valid_tokens[username] == password: + # Set authentication token + outgoing.append((b'auth-token', f'token-{username}'.encode())) + return username + else: + raise flight.FlightUnauthenticatedError("Invalid credentials") + + def is_valid(self, token): + """Validate authentication token.""" + if token.startswith('token-'): + username = token[6:] # Remove 'token-' prefix + return username if username in self.valid_tokens else None + return None + +class SimpleClientAuthHandler(flight.ClientAuthHandler): + """Simple client authentication handler.""" + + def __init__(self, username, password): + self.username = username + self.password = password + self.token = None + + def authenticate(self, outgoing, incoming): + """Authenticate client.""" + # Send credentials + outgoing.append((b'username', self.username.encode())) + outgoing.append((b'password', self.password.encode())) + + # Get token from response + for header in incoming: + if header[0] == b'auth-token': + self.token = header[1].decode() + break + + def get_token(self): + """Get authentication token.""" + return self.token + +# Client usage with authentication +auth_handler = SimpleClientAuthHandler("user123", "secret456") +client = flight.connect("grpc://localhost:8080", auth_handler=auth_handler) + +# Authenticate +client.authenticate(auth_handler) + +# Now use authenticated client +flights = list(client.list_flights()) +print(f"Found {len(flights)} flights") + +client.close() +``` + +### Advanced Streaming + +```python +import pyarrow as pa +import pyarrow.flight as flight +import time + +class StreamingFlightServer(flight.FlightServerBase): + """Flight server with streaming data generation.""" + + def do_get(self, context, ticket): + """Generate streaming data.""" + path = ticket.ticket.decode() + + if path == "streaming/numbers": + return self.generate_number_stream() + elif path == "streaming/time_series": + return self.generate_time_series() + else: + raise flight.FlightUnavailableError(f"Unknown streaming path: {path}") + + def generate_number_stream(self): + """Generate stream of random numbers.""" + schema = pa.schema([ + pa.field('id', pa.int64()), + pa.field('random_value', pa.float64()) + ]) + + def number_generator(): + import random + batch_size = 1000 + + for batch_num in range(10): # 10 batches + ids = list(range(batch_num * batch_size, (batch_num + 1) * batch_size)) + values = [random.random() for _ in range(batch_size)] + + batch = pa.record_batch([ids, values], schema=schema) + yield batch + + # Simulate processing delay + time.sleep(0.1) + + return flight.GeneratorStream(schema, number_generator()) + + def generate_time_series(self): + """Generate time series data.""" + schema = pa.schema([ + pa.field('timestamp', pa.timestamp('s')), + pa.field('sensor_id', pa.string()), + pa.field('value', pa.float64()) + ]) + + def time_series_generator(): + import random + from datetime import datetime, timedelta + + start_time = datetime.now() + sensors = ['sensor_001', 'sensor_002', 'sensor_003'] + + for minute in range(60): # 1 hour of data + current_time = start_time + timedelta(minutes=minute) + + timestamps = [current_time] * len(sensors) + sensor_ids = sensors + values = [random.uniform(20.0, 30.0) for _ in sensors] + + batch = pa.record_batch([timestamps, sensor_ids, values], schema=schema) + yield batch + + # Real-time simulation + time.sleep(0.05) + + return flight.GeneratorStream(schema, time_series_generator()) + +# Client streaming consumption +client = flight.connect("grpc://localhost:8080") + +# Stream processing +descriptor = flight.FlightDescriptor.for_path("streaming", "numbers") +info = client.get_flight_info(descriptor) + +for endpoint in info.endpoints: + reader = client.do_get(endpoint.ticket) + + batch_count = 0 + total_rows = 0 + + for chunk in reader: + batch = chunk.data + batch_count += 1 + total_rows += len(batch) + + print(f"Received batch {batch_count}: {len(batch)} rows") + + # Process batch + if len(batch) > 0: + avg_value = pa.compute.mean(batch['random_value']).as_py() + print(f" Average value: {avg_value:.4f}") + + print(f"Total: {batch_count} batches, {total_rows} rows") + +client.close() +``` + +### Middleware and Monitoring + +```python +import pyarrow.flight as flight +import time + +class TimingClientMiddleware(flight.ClientMiddleware): + """Client middleware for timing requests.""" + + def __init__(self): + self.start_time = None + + def sending_headers(self): + """Record start time.""" + self.start_time = time.time() + + def received_headers(self, headers): + """Log headers received.""" + print(f"Received headers: {dict(headers)}") + + def received_trailers(self, trailers): + """Calculate and log timing.""" + if self.start_time: + duration = time.time() - self.start_time + print(f"Request completed in {duration:.3f} seconds") + +class TimingClientMiddlewareFactory(flight.ClientMiddlewareFactory): + """Factory for timing middleware.""" + + def start_call(self, info): + """Create timing middleware for each call.""" + print(f"Starting call: {info.method}") + return TimingClientMiddleware() + +class LoggingServerMiddleware(flight.ServerMiddleware): + """Server middleware for logging requests.""" + + def __init__(self, call_info, headers): + self.call_info = call_info + self.headers = headers + self.start_time = time.time() + print(f"Request started: {call_info.method}") + print(f"Headers: {dict(headers)}") + + def call_completed(self, exception): + """Log call completion.""" + duration = time.time() - self.start_time + if exception: + print(f"Request failed after {duration:.3f}s: {exception}") + else: + print(f"Request completed in {duration:.3f}s") + +class LoggingServerMiddlewareFactory(flight.ServerMiddlewareFactory): + """Factory for logging middleware.""" + + def start_call(self, info, headers): + """Create logging middleware for each call.""" + return LoggingServerMiddleware(info, headers) + +# Client with middleware +middleware = [TimingClientMiddlewareFactory()] +client = flight.connect("grpc://localhost:8080", middleware=middleware) + +# All requests will be timed +flights = list(client.list_flights()) +print(f"Listed {len(flights)} flights") + +client.close() +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pyarrow/docs/compute-functions.md b/.tessl/tiles/tessl/pypi-pyarrow/docs/compute-functions.md new file mode 100644 index 0000000..0244c20 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pyarrow/docs/compute-functions.md @@ -0,0 +1,1235 @@ +# Compute Functions + +High-performance vectorized compute operations providing 200+ functions optimized for columnar data. The compute engine enables efficient mathematical operations, string processing, temporal calculations, aggregations, and filtering on Arrow arrays and tables. + +## Capabilities + +### Core Compute Infrastructure + +Function registration, execution, and expression system for building complex computational pipelines with lazy evaluation and optimization. + +```python { .api } +def call_function(name, args, options=None, memory_pool=None): + """ + Call compute function by name. + + Parameters: + - name: str, function name + - args: list, function arguments (arrays, scalars, tables) + - options: FunctionOptions, function-specific options + - memory_pool: MemoryPool, memory pool for allocations + + Returns: + Array, Scalar, or Table: Result of computation + """ + +def get_function(name): + """ + Get registered function by name. + + Parameters: + - name: str, function name + + Returns: + Function: Registered function object + """ + +def list_functions(): + """ + List all available function names. + + Returns: + list of str: Available function names + """ + +def function_registry(): + """ + Get global function registry. + + Returns: + FunctionRegistry: Global function registry + """ + +class Expression: + """ + Compute expression for lazy evaluation and optimization. + """ + + def equals(self, other): + """Check equality with another expression.""" + + def to_string(self): + """String representation of expression.""" + +def field(name): + """ + Create field reference expression. + + Parameters: + - name: str, field name + + Returns: + Expression: Field reference expression + """ + +def scalar(value): + """ + Create scalar literal expression. + + Parameters: + - value: scalar value + + Returns: + Expression: Scalar literal expression + """ + +class Function: + """Base class for compute functions.""" + + @property + def name(self): + """Function name.""" + + @property + def arity(self): + """Function arity (number of arguments).""" + + @property + def doc(self): + """Function documentation.""" + +class FunctionOptions: + """Base class for function options.""" + +class FunctionRegistry: + """Registry of available compute functions.""" + + def get_function(self, name): + """Get function by name.""" + + def get_function_names(self): + """Get all function names.""" +``` + +### Mathematical Operations + +Arithmetic operations, mathematical functions, and numeric computations optimized for columnar data processing. + +```python { .api } +# Arithmetic operations +def add(x, y): + """Element-wise addition.""" + +def subtract(x, y): + """Element-wise subtraction.""" + +def multiply(x, y): + """Element-wise multiplication.""" + +def divide(x, y): + """Element-wise division.""" + +def power(base, exponent): + """Element-wise exponentiation.""" + +def negate(x): + """Element-wise negation.""" + +def abs(x): + """Element-wise absolute value.""" + +def sign(x): + """Element-wise sign (-1, 0, 1).""" + +# Mathematical functions +def sqrt(x): + """Element-wise square root.""" + +def exp(x): + """Element-wise exponential (e^x).""" + +def ln(x): + """Element-wise natural logarithm.""" + +def log10(x): + """Element-wise base-10 logarithm.""" + +def log2(x): + """Element-wise base-2 logarithm.""" + +def log1p(x): + """Element-wise log(1 + x).""" + +def floor(x): + """Element-wise floor.""" + +def ceil(x): + """Element-wise ceiling.""" + +def trunc(x): + """Element-wise truncation toward zero.""" + +def round(x, ndigits=0, round_mode='half_to_even'): + """ + Element-wise rounding. + + Parameters: + - x: Array, input array + - ndigits: int, number of decimal places + - round_mode: str, rounding mode + + Returns: + Array: Rounded array + """ + +# Trigonometric functions +def sin(x): + """Element-wise sine.""" + +def cos(x): + """Element-wise cosine.""" + +def tan(x): + """Element-wise tangent.""" + +def asin(x): + """Element-wise arcsine.""" + +def acos(x): + """Element-wise arccosine.""" + +def atan(x): + """Element-wise arctangent.""" + +def atan2(y, x): + """Element-wise arctangent of y/x.""" + +# Bitwise operations +def bit_wise_and(x, y): + """Element-wise bitwise AND.""" + +def bit_wise_or(x, y): + """Element-wise bitwise OR.""" + +def bit_wise_xor(x, y): + """Element-wise bitwise XOR.""" + +def bit_wise_not(x): + """Element-wise bitwise NOT.""" + +def shift_left(x, y): + """Element-wise left bit shift.""" + +def shift_right(x, y): + """Element-wise right bit shift.""" +``` + +### Comparison and Logical Operations + +Element-wise comparisons, logical operations, and boolean functions for filtering and conditional logic. + +```python { .api } +# Comparison operations +def equal(x, y): + """Element-wise equality comparison.""" + +def not_equal(x, y): + """Element-wise inequality comparison.""" + +def less(x, y): + """Element-wise less than comparison.""" + +def less_equal(x, y): + """Element-wise less than or equal comparison.""" + +def greater(x, y): + """Element-wise greater than comparison.""" + +def greater_equal(x, y): + """Element-wise greater than or equal comparison.""" + +# Logical operations +def and_(x, y): + """Element-wise logical AND.""" + +def or_(x, y): + """Element-wise logical OR.""" + +def xor(x, y): + """Element-wise logical XOR.""" + +def invert(x): + """Element-wise logical NOT.""" + +# Null handling +def is_null(x): + """Check for null values.""" + +def is_valid(x): + """Check for non-null values.""" + +def is_nan(x): + """Check for NaN values (floating point).""" + +def is_finite(x): + """Check for finite values.""" + +def is_infinite(x): + """Check for infinite values.""" + +def fill_null(values, fill_value): + """Fill null values with specified value.""" + +def coalesce(*arrays): + """Return first non-null value from arrays.""" + +def choose(indices, *arrays): + """Choose values from arrays based on indices.""" + +def if_else(condition, left, right): + """Conditional selection (ternary operator).""" + +def case_when(*args): + """ + Multi-branch conditional selection. + + Parameters: + - args: alternating condition/value pairs, optional else value + + Returns: + Array: Selected values based on conditions + """ +``` + +### Aggregation Functions + +Statistical and aggregation functions for computing summary statistics and reductions over arrays and groups. + +```python { .api } +# Basic aggregations +def sum(array, skip_nulls=True, min_count=1): + """ + Sum of array elements. + + Parameters: + - array: Array, input array + - skip_nulls: bool, ignore null values + - min_count: int, minimum non-null values required + + Returns: + Scalar: Sum of elements + """ + +def mean(array, skip_nulls=True, min_count=1): + """Mean of array elements.""" + +def count(array, mode='only_valid'): + """ + Count array elements. + + Parameters: + - array: Array, input array + - mode: str, counting mode ('only_valid', 'only_null', 'all') + + Returns: + Scalar: Count of elements + """ + +def count_distinct(array, mode='only_valid'): + """Count distinct elements.""" + +def min(array, skip_nulls=True, min_count=1): + """Minimum value.""" + +def max(array, skip_nulls=True, min_count=1): + """Maximum value.""" + +def min_max(array, skip_nulls=True, min_count=1): + """ + Minimum and maximum values. + + Returns: + StructScalar: Struct with 'min' and 'max' fields + """ + +def any(array, skip_nulls=True, min_count=1): + """Logical OR reduction (any true values).""" + +def all(array, skip_nulls=True, min_count=1): + """Logical AND reduction (all true values).""" + +# Statistical functions +def variance(array, ddof=0, skip_nulls=True, min_count=1): + """ + Variance of array elements. + + Parameters: + - array: Array, input array + - ddof: int, delta degrees of freedom + - skip_nulls: bool, ignore null values + - min_count: int, minimum non-null values required + + Returns: + Scalar: Variance + """ + +def stddev(array, ddof=0, skip_nulls=True, min_count=1): + """Standard deviation.""" + +def quantile(array, q=0.5, interpolation='linear', skip_nulls=True, min_count=1): + """ + Quantile of array elements. + + Parameters: + - array: Array, input array + - q: float or list, quantile(s) to compute (0.0 to 1.0) + - interpolation: str, interpolation method + - skip_nulls: bool, ignore null values + - min_count: int, minimum non-null values required + + Returns: + Scalar or Array: Quantile value(s) + """ + +def mode(array, n=1, skip_nulls=True, min_count=1): + """ + Mode (most frequent values). + + Parameters: + - array: Array, input array + - n: int, number of modes to return + - skip_nulls: bool, ignore null values + - min_count: int, minimum non-null values required + + Returns: + StructArray: Modes with counts + """ + +def tdigest(array, q=None, delta=100, buffer_size=500, skip_nulls=True, min_count=1): + """ + T-Digest quantile approximation. + + Parameters: + - array: Array, input array + - q: list of float, quantiles to compute + - delta: int, compression parameter + - buffer_size: int, buffer size + - skip_nulls: bool, ignore null values + - min_count: int, minimum non-null values required + + Returns: + Array: Approximate quantiles + """ + +# Product and cumulative operations +def product(array, skip_nulls=True, min_count=1): + """Product of array elements.""" + +def cumulative_sum(array, start=None, skip_nulls=True): + """ + Cumulative sum. + + Parameters: + - array: Array, input array + - start: scalar, starting value + - skip_nulls: bool, ignore null values + + Returns: + Array: Cumulative sums + """ + +def cumulative_sum_checked(array, start=None, skip_nulls=True): + """Cumulative sum with overflow checking.""" + +def cumulative_prod(array, start=None, skip_nulls=True): + """Cumulative product.""" + +def cumulative_max(array, skip_nulls=True): + """Cumulative maximum.""" + +def cumulative_min(array, skip_nulls=True): + """Cumulative minimum.""" +``` + +### Array Operations + +Functions for array manipulation, filtering, sorting, and selection operations. + +```python { .api } +def take(data, indices, boundscheck=True): + """ + Select elements by indices. + + Parameters: + - data: Array, input array + - indices: Array, selection indices + - boundscheck: bool, check index bounds + + Returns: + Array: Selected elements + """ + +def filter(data, selection_filter, null_selection_behavior='drop'): + """ + Filter array by boolean mask. + + Parameters: + - data: Array, input array + - selection_filter: Array, boolean selection mask + - null_selection_behavior: str, how to handle nulls in mask + + Returns: + Array: Filtered elements + """ + +def slice(array, start, stop=None, step=1): + """ + Slice array. + + Parameters: + - array: Array, input array + - start: int, start index + - stop: int, stop index (exclusive) + - step: int, step size + + Returns: + Array: Sliced array + """ + +def array_sort_indices(array, order='ascending', null_placement='at_end'): + """ + Get indices that would sort array. + + Parameters: + - array: Array, input array + - order: str, sort order ('ascending', 'descending') + - null_placement: str, null placement ('at_start', 'at_end') + + Returns: + Array: Sort indices + """ + +def sort_indices(arrays, orders=None, null_placement=None): + """ + Get indices for sorting by multiple arrays. + + Parameters: + - arrays: list of Array, sort keys + - orders: list of str, sort orders for each key + - null_placement: list of str, null placement for each key + + Returns: + Array: Sort indices + """ + +def partition_nth_indices(array, pivot, null_placement='at_end'): + """ + Partition array around nth element. + + Parameters: + - array: Array, input array + - pivot: int, pivot index + - null_placement: str, null placement + + Returns: + Array: Partition indices + """ + +def top_k_unstable(array, k, sort_keys=None): + """ + Select top k elements (unstable sort). + + Parameters: + - array: Array, input array + - k: int, number of elements to select + - sort_keys: list, sort keys for selection + + Returns: + Array: Top k elements + """ + +def bottom_k_unstable(array, k, sort_keys=None): + """ + Select bottom k elements (unstable sort). + + Parameters: + - array: Array, input array + - k: int, number of elements to select + - sort_keys: list, sort keys for selection + + Returns: + Array: Bottom k elements + """ + +def unique(array): + """ + Get unique values. + + Parameters: + - array: Array, input array + + Returns: + Array: Unique values + """ + +def value_counts(array): + """ + Count occurrences of each value. + + Parameters: + - array: Array, input array + + Returns: + StructArray: Values and their counts + """ + +def dictionary_encode(array, null_encoding_behavior='mask'): + """ + Dictionary encode array. + + Parameters: + - array: Array, input array + - null_encoding_behavior: str, null handling + + Returns: + DictionaryArray: Dictionary encoded array + """ + +def run_end_encode(array): + """ + Run-end encode array. + + Parameters: + - array: Array, input array + + Returns: + RunEndEncodedArray: Run-end encoded array + """ +``` + +### String Functions + +Comprehensive string processing functions for text manipulation, pattern matching, and string transformations. + +```python { .api } +# String length and properties +def utf8_length(strings): + """UTF-8 character length of strings.""" + +def binary_length(strings): + """Byte length of binary/string arrays.""" + +def utf8_is_alnum(strings): + """Check if strings are alphanumeric.""" + +def utf8_is_alpha(strings): + """Check if strings are alphabetic.""" + +def utf8_is_decimal(strings): + """Check if strings are decimal.""" + +def utf8_is_digit(strings): + """Check if strings contain only digits.""" + +def utf8_is_lower(strings): + """Check if strings are lowercase.""" + +def utf8_is_numeric(strings): + """Check if strings are numeric.""" + +def utf8_is_printable(strings): + """Check if strings are printable.""" + +def utf8_is_space(strings): + """Check if strings are whitespace.""" + +def utf8_is_title(strings): + """Check if strings are titlecased.""" + +def utf8_is_upper(strings): + """Check if strings are uppercase.""" + +# String transformations +def utf8_upper(strings): + """Convert strings to uppercase.""" + +def utf8_lower(strings): + """Convert strings to lowercase.""" + +def utf8_swapcase(strings): + """Swap case of strings.""" + +def utf8_capitalize(strings): + """Capitalize first character.""" + +def utf8_title(strings): + """Convert to title case.""" + +def ascii_upper(strings): + """Convert ASCII strings to uppercase.""" + +def ascii_lower(strings): + """Convert ASCII strings to lowercase.""" + +def ascii_swapcase(strings): + """Swap case of ASCII strings.""" + +def ascii_capitalize(strings): + """Capitalize ASCII strings.""" + +# String padding and trimming +def utf8_ltrim(strings, characters=' '): + """ + Left trim strings. + + Parameters: + - strings: Array, input strings + - characters: str, characters to trim + + Returns: + Array: Left-trimmed strings + """ + +def utf8_rtrim(strings, characters=' '): + """Right trim strings.""" + +def utf8_trim(strings, characters=' '): + """Trim strings from both ends.""" + +def utf8_ltrim_whitespace(strings): + """Left trim whitespace.""" + +def utf8_rtrim_whitespace(strings): + """Right trim whitespace.""" + +def utf8_trim_whitespace(strings): + """Trim whitespace from both ends.""" + +def utf8_center(strings, width, padding=' '): + """ + Center strings with padding. + + Parameters: + - strings: Array, input strings + - width: int, total width + - padding: str, padding character + + Returns: + Array: Centered strings + """ + +def utf8_lpad(strings, width, padding=' '): + """Left pad strings.""" + +def utf8_rpad(strings, width, padding=' '): + """Right pad strings.""" + +# String slicing and extraction +def utf8_slice_codeunits(strings, start, stop=None, step=1): + """ + Slice strings by code units. + + Parameters: + - strings: Array, input strings + - start: int, start position + - stop: int, stop position + - step: int, step size + + Returns: + Array: Sliced strings + """ + +def utf8_reverse(strings): + """Reverse strings.""" + +def utf8_replace_slice(strings, start, stop, replacement): + """ + Replace slice of strings. + + Parameters: + - strings: Array, input strings + - start: int, start position + - stop: int, stop position + - replacement: str, replacement string + + Returns: + Array: Strings with replaced slices + """ + +# String searching and matching +def match_substring(strings, pattern, ignore_case=False): + """ + Check if strings contain substring. + + Parameters: + - strings: Array, input strings + - pattern: str, substring pattern + - ignore_case: bool, case insensitive matching + + Returns: + BooleanArray: Match results + """ + +def match_substring_regex(strings, pattern, ignore_case=False): + """ + Check if strings match regex pattern. + + Parameters: + - strings: Array, input strings + - pattern: str, regex pattern + - ignore_case: bool, case insensitive matching + + Returns: + BooleanArray: Match results + """ + +def find_substring(strings, pattern, ignore_case=False): + """ + Find first occurrence of substring. + + Parameters: + - strings: Array, input strings + - pattern: str, substring pattern + - ignore_case: bool, case insensitive search + + Returns: + Int32Array: First occurrence indices (-1 if not found) + """ + +def find_substring_regex(strings, pattern, ignore_case=False): + """Find first regex match.""" + +def count_substring(strings, pattern, ignore_case=False): + """ + Count occurrences of substring. + + Parameters: + - strings: Array, input strings + - pattern: str, substring pattern + - ignore_case: bool, case insensitive counting + + Returns: + Int32Array: Occurrence counts + """ + +def count_substring_regex(strings, pattern, ignore_case=False): + """Count regex matches.""" + +# String replacement +def replace_substring(strings, pattern, replacement, max_replacements=-1): + """ + Replace substring occurrences. + + Parameters: + - strings: Array, input strings + - pattern: str, substring to replace + - replacement: str, replacement string + - max_replacements: int, maximum replacements (-1 for all) + + Returns: + Array: Strings with replacements + """ + +def replace_substring_regex(strings, pattern, replacement, max_replacements=-1): + """Replace regex matches.""" + +def extract_regex(strings, pattern): + """ + Extract regex groups. + + Parameters: + - strings: Array, input strings + - pattern: str, regex pattern with groups + + Returns: + StructArray: Extracted groups + """ + +# String splitting and joining +def split_pattern(strings, pattern, max_splits=-1, reverse=False): + """ + Split strings by pattern. + + Parameters: + - strings: Array, input strings + - pattern: str, split pattern + - max_splits: int, maximum splits (-1 for unlimited) + - reverse: bool, split from right + + Returns: + ListArray: Split components + """ + +def split_pattern_regex(strings, pattern, max_splits=-1, reverse=False): + """Split strings by regex pattern.""" + +def binary_join(lists, separator): + """ + Join binary arrays with separator. + + Parameters: + - lists: ListArray, lists of binary values + - separator: bytes, join separator + + Returns: + Array: Joined binary values + """ + +def binary_join_element_wise(left, right, separator): + """Element-wise binary join.""" +``` + +### Temporal Functions + +Date, time, and timestamp manipulation functions for temporal data processing and calendar operations. + +```python { .api } +# Date/time extraction +def year(timestamps): + """Extract year from timestamps.""" + +def month(timestamps): + """Extract month from timestamps.""" + +def day(timestamps): + """Extract day from timestamps.""" + +def day_of_week(timestamps, count_from_zero=True, week_start=1): + """ + Extract day of week. + + Parameters: + - timestamps: Array, timestamp array + - count_from_zero: bool, whether to count from 0 + - week_start: int, first day of week (1=Monday, 7=Sunday) + + Returns: + Int32Array: Day of week values + """ + +def day_of_year(timestamps): + """Extract day of year.""" + +def iso_week(timestamps): + """Extract ISO week number.""" + +def iso_year(timestamps): + """Extract ISO year.""" + +def quarter(timestamps): + """Extract quarter.""" + +def hour(timestamps): + """Extract hour from timestamps.""" + +def minute(timestamps): + """Extract minute from timestamps.""" + +def second(timestamps): + """Extract second from timestamps.""" + +def millisecond(timestamps): + """Extract millisecond from timestamps.""" + +def microsecond(timestamps): + """Extract microsecond from timestamps.""" + +def nanosecond(timestamps): + """Extract nanosecond from timestamps.""" + +def subsecond(timestamps): + """Extract fractional seconds.""" + +# Temporal arithmetic +def years_between(start, end): + """Calculate years between timestamps.""" + +def month_interval_between(start, end): + """Calculate month intervals between timestamps.""" + +def day_time_interval_between(start, end): + """Calculate day-time intervals between timestamps.""" + +def weeks_between(start, end): + """Calculate weeks between timestamps.""" + +def days_between(start, end): + """Calculate days between timestamps.""" + +def hours_between(start, end): + """Calculate hours between timestamps.""" + +def minutes_between(start, end): + """Calculate minutes between timestamps.""" + +def seconds_between(start, end): + """Calculate seconds between timestamps.""" + +def milliseconds_between(start, end): + """Calculate milliseconds between timestamps.""" + +def microseconds_between(start, end): + """Calculate microseconds between timestamps.""" + +def nanoseconds_between(start, end): + """Calculate nanoseconds between timestamps.""" + +# Temporal rounding and truncation +def floor_temporal(timestamps, unit='day', week_starts_monday=True, ceil_is_strictly_greater=False, calendar_based_origin=False): + """ + Floor timestamps to temporal unit. + + Parameters: + - timestamps: Array, timestamp array + - unit: str, temporal unit ('year', 'month', 'day', 'hour', etc.) + - week_starts_monday: bool, week start day + - ceil_is_strictly_greater: bool, ceiling behavior + - calendar_based_origin: bool, use calendar-based origin + + Returns: + Array: Floored timestamps + """ + +def ceil_temporal(timestamps, unit='day', week_starts_monday=True, ceil_is_strictly_greater=False, calendar_based_origin=False): + """Ceil timestamps to temporal unit.""" + +def round_temporal(timestamps, unit='day', week_starts_monday=True, ceil_is_strictly_greater=False, calendar_based_origin=False): + """Round timestamps to temporal unit.""" + +# String parsing and formatting +def strftime(timestamps, format='%Y-%m-%d %H:%M:%S', locale='C'): + """ + Format timestamps as strings. + + Parameters: + - timestamps: Array, timestamp array + - format: str, strftime format string + - locale: str, locale for formatting + + Returns: + StringArray: Formatted timestamp strings + """ + +def strptime(strings, format, unit, error_is_null=False): + """ + Parse strings as timestamps. + + Parameters: + - strings: Array, string array + - format: str, strptime format string + - unit: str, timestamp unit + - error_is_null: bool, return null on parse errors + + Returns: + TimestampArray: Parsed timestamps + """ + +# Timezone operations +def assume_timezone(timestamps, timezone, ambiguous='raise', nonexistent='raise'): + """ + Assume timezone for naive timestamps. + + Parameters: + - timestamps: Array, naive timestamp array + - timezone: str, timezone identifier + - ambiguous: str, how to handle ambiguous times + - nonexistent: str, how to handle nonexistent times + + Returns: + TimestampArray: Timezone-aware timestamps + """ + +def local_timestamp(timestamps): + """Convert to local timezone.""" +``` + +### Type Conversion Functions + +Functions for casting and converting between different Arrow data types with configurable safety and behavior options. + +```python { .api } +def cast(array, target_type, safe=True, options=None): + """ + Cast array to different type. + + Parameters: + - array: Array, input array + - target_type: DataType, target type + - safe: bool, check for data loss + - options: CastOptions, casting options + + Returns: + Array: Cast array + """ + +def can_cast(from_type, to_type): + """ + Check if type can be cast. + + Parameters: + - from_type: DataType, source type + - to_type: DataType, target type + + Returns: + bool: Whether cast is supported + """ + +class CastOptions: + """ + Options for type casting. + + Attributes: + - safe: Whether to check for data loss + - allow_int_overflow: Allow integer overflow + - allow_time_truncate: Allow time truncation + - allow_time_overflow: Allow time overflow + - allow_decimal_truncate: Allow decimal truncation + - allow_float_truncate: Allow float truncation + """ +``` + +### Random Number Generation + +Functions for generating random numbers and sampling from distributions. + +```python { .api } +def random(n, initializer=None, options=None): + """ + Generate random numbers. + + Parameters: + - n: int, number of random values + - initializer: int, random seed + - options: RandomOptions, generation options + + Returns: + Array: Random values + """ + +class RandomOptions: + """ + Options for random number generation. + + Attributes: + - initializer: Random seed + - distribution: Distribution type + """ +``` + +## Usage Examples + +### Basic Computations + +```python +import pyarrow as pa +import pyarrow.compute as pc + +# Create sample data +numbers = pa.array([1, 2, 3, 4, 5, None, 7, 8, 9, 10]) +strings = pa.array(['apple', 'banana', 'cherry', None, 'date']) + +# Arithmetic operations +doubled = pc.multiply(numbers, 2) +sum_result = pc.sum(numbers) +mean_result = pc.mean(numbers) + +# String operations +lengths = pc.utf8_length(strings) +upper_strings = pc.utf8_upper(strings) +contains_a = pc.match_substring(strings, 'a') + +# Filtering and selection +filtered = pc.filter(numbers, pc.greater(numbers, 5)) +top_3 = pc.top_k_unstable(numbers, 3) +``` + +### Table Operations + +```python +import pyarrow as pa +import pyarrow.compute as pc + +# Create table +table = pa.table({ + 'id': [1, 2, 3, 4, 5], + 'name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'], + 'age': [25, 30, 35, 28, 32], + 'salary': [50000, 60000, 70000, 55000, 65000] +}) + +# Filter table +adults = table.filter(pc.greater_equal(table['age'], 30)) + +# Add computed column +table_with_bonus = table.add_column( + 'bonus', + pc.multiply(table['salary'], 0.1) +) + +# Aggregations +total_salary = pc.sum(table['salary']) +avg_age = pc.mean(table['age']) +age_stats = pc.quantile(table['age'], [0.25, 0.5, 0.75]) +``` + +### Complex Expressions + +```python +import pyarrow as pa +import pyarrow.compute as pc + +# Create table with temporal data +table = pa.table({ + 'timestamp': pa.array([ + '2023-01-15 10:30:00', + '2023-02-20 14:45:00', + '2023-03-10 09:15:00', + '2023-04-05 16:20:00' + ], type=pa.timestamp('s')), + 'value': [100, 200, 150, 300] +}) + +# Extract temporal components +table = table.add_column('year', pc.year(table['timestamp'])) +table = table.add_column('month', pc.month(table['timestamp'])) +table = table.add_column('day_of_week', pc.day_of_week(table['timestamp'])) + +# Complex filtering +high_value_weekdays = table.filter( + pc.and_( + pc.greater(table['value'], 150), + pc.less(table['day_of_week'], 5) # Monday=0 to Friday=4 + ) +) + +# Conditional expressions +table = table.add_column( + 'category', + pc.case_when( + pc.less(table['value'], 150), 'low', + pc.less(table['value'], 250), 'medium', + 'high' + ) +) +``` + +### User-Defined Functions + +```python +import pyarrow as pa +import pyarrow.compute as pc + +# Register scalar UDF +def double_and_add_one(x): + return pc.add(pc.multiply(x, 2), 1) + +pc.register_scalar_function( + double_and_add_one, + 'double_and_add_one', + doc='Double input and add one' +) + +# Use registered function +result = pc.call_function('double_and_add_one', [pa.array([1, 2, 3, 4, 5])]) +print(result) # [3, 5, 7, 9, 11] +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pyarrow/docs/core-data-structures.md b/.tessl/tiles/tessl/pypi-pyarrow/docs/core-data-structures.md new file mode 100644 index 0000000..baf8494 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pyarrow/docs/core-data-structures.md @@ -0,0 +1,687 @@ +# Core Data Structures + +Fundamental data containers that form the foundation of PyArrow's columnar data processing capabilities. These structures provide efficient storage and manipulation of typed data in memory-optimized columnar layouts. + +## Capabilities + +### Arrays + +One-dimensional sequences of values with a specific data type. Arrays are immutable and provide the basic building blocks for all other data structures in PyArrow. + +```python { .api } +def array(obj, type=None, mask=None, size=None, from_pandas=None, safe=True): + """ + Create Arrow array from Python sequence, NumPy array, or pandas data. + + Parameters: + - obj: sequence, NumPy array, or pandas Series to convert + - type: DataType, explicit type for the array + - mask: array-like, boolean mask for null values + - size: int, length of array if obj is scalar + - from_pandas: bool, interpret pandas-specific data + - safe: bool, check for overflow/truncation during conversion + + Returns: + Array: Arrow array with specified type + """ + +def chunked_array(arrays, type=None): + """ + Create chunked array from list of arrays. + + Parameters: + - arrays: sequence of Array objects + - type: DataType, explicit type (must match all arrays) + + Returns: + ChunkedArray: Chunked array composed of input arrays + """ + +def nulls(size, type=None): + """ + Create array of null values. + + Parameters: + - size: int, length of array + - type: DataType, type of nulls (default: null type) + + Returns: + Array: Array of null values + """ + +def repeat(value, size): + """ + Create array by repeating a single value. + + Parameters: + - value: scalar value to repeat + - size: int, number of repetitions + + Returns: + Array: Array with repeated value + """ + +def arange(start, stop=None, step=1, dtype=None): + """ + Create array with range of values. + + Parameters: + - start: int, start value (or stop if stop is None) + - stop: int, stop value (exclusive) + - step: int, step size + - dtype: DataType, array data type + + Returns: + Array: Array with range values + """ + +class Array: + """ + Base class for all Arrow arrays. + + Attributes: + - type: DataType of the array + - length: Number of elements + - null_count: Number of null values + - is_valid: Boolean array indicating non-null values + """ + + def __len__(self): ... + def __getitem__(self, key): ... + def __iter__(self): ... + + def to_pylist(self): + """Convert to Python list.""" + + def to_pandas(self, **kwargs): + """Convert to pandas Series.""" + + def to_numpy(self, **kwargs): + """Convert to NumPy array.""" + + def slice(self, offset=0, length=None): + """Return slice of array.""" + + def take(self, indices): + """Select elements by indices.""" + + def filter(self, mask): + """Filter array by boolean mask.""" + + def sort(self, **kwargs): + """Return sorted array.""" + + def unique(self): + """Return array of unique values.""" + + def value_counts(self): + """Return struct array of value counts.""" + +class ChunkedArray: + """ + Array composed of multiple contiguous arrays (chunks). + + Attributes: + - type: DataType of the chunked array + - length: Total number of elements across chunks + - null_count: Total number of null values + - num_chunks: Number of chunks + - chunks: List of Array chunks + """ + + def __len__(self): ... + def __getitem__(self, key): ... + def __iter__(self): ... + + def chunk(self, i): + """Get chunk at index i.""" + + def to_pylist(self): + """Convert to Python list.""" + + def to_pandas(self, **kwargs): + """Convert to pandas Series.""" + + def slice(self, offset=0, length=None): + """Return slice of chunked array.""" + + def take(self, indices): + """Select elements by indices.""" + + def filter(self, mask): + """Filter by boolean mask.""" + + def combine_chunks(self): + """Combine chunks into single array.""" +``` + +### Tables + +Two-dimensional datasets with named columns, similar to SQL tables or pandas DataFrames. Tables provide the primary interface for working with tabular data in PyArrow. + +```python { .api } +def table(data, schema=None, metadata=None, columns=None): + """ + Create Arrow table from various data sources. + + Parameters: + - data: dict, list of arrays, pandas DataFrame, or RecordBatch + - schema: Schema, explicit schema for the table + - metadata: dict, key-value metadata + - columns: list of str, column names (when data is list) + + Returns: + Table: Arrow table with specified schema + """ + +def record_batch(data, schema=None, metadata=None): + """ + Create RecordBatch from data. + + Parameters: + - data: dict, list of arrays, or sequence + - schema: Schema, explicit schema + - metadata: dict, key-value metadata + + Returns: + RecordBatch: Single batch of columnar data + """ + +def concat_tables(tables, promote=False): + """ + Concatenate tables vertically. + + Parameters: + - tables: sequence of Table objects + - promote: bool, promote schemas to compatible type + + Returns: + Table: Concatenated table + """ + +def concat_arrays(arrays): + """ + Concatenate arrays into single array. + + Parameters: + - arrays: sequence of Array objects with same type + + Returns: + Array: Concatenated array + """ + +def concat_batches(batches, promote=False): + """ + Concatenate record batches. + + Parameters: + - batches: sequence of RecordBatch objects + - promote: bool, promote schemas to compatible type + + Returns: + Table: Table created from concatenated batches + """ + +class Table: + """ + Two-dimensional table of columnar data. + + Attributes: + - schema: Schema of the table + - num_columns: Number of columns + - num_rows: Number of rows + - column_names: List of column names + - columns: List of ChunkedArray columns + """ + + def __len__(self): ... + def __getitem__(self, key): ... + def __iter__(self): ... + + def column(self, i): + """Get column by index or name.""" + + def select(self, columns): + """Select subset of columns.""" + + def slice(self, offset=0, length=None): + """Return slice of table.""" + + def filter(self, mask): + """Filter rows by boolean mask.""" + + def take(self, indices): + """Select rows by indices.""" + + def sort_by(self, sorting): + """Sort table by columns.""" + + def group_by(self, keys): + """Group table by columns.""" + + def join(self, right_table, **kwargs): + """Join with another table.""" + + def to_pandas(self, **kwargs): + """Convert to pandas DataFrame.""" + + def to_pydict(self): + """Convert to dictionary of Python lists.""" + + def to_batches(self, max_chunksize=None): + """Convert to iterator of RecordBatch objects.""" + + def add_column(self, i, field, column): + """Add column at position i.""" + + def append_column(self, field, column): + """Append column to table.""" + + def remove_column(self, i): + """Remove column at position i.""" + + def rename_columns(self, names): + """Rename columns.""" + + def drop(self, columns): + """Drop columns by name.""" + + def replace_schema_metadata(self, metadata): + """Replace table metadata.""" + +class RecordBatch: + """ + Collection of arrays with shared length representing a single batch. + + Attributes: + - schema: Schema of the batch + - num_columns: Number of columns + - num_rows: Number of rows + - column_names: List of column names + - columns: List of Array columns + """ + + def __len__(self): ... + def __getitem__(self, key): ... + def __iter__(self): ... + + def column(self, i): + """Get column by index or name.""" + + def select(self, columns): + """Select subset of columns.""" + + def slice(self, offset=0, length=None): + """Return slice of batch.""" + + def filter(self, mask): + """Filter rows by boolean mask.""" + + def take(self, indices): + """Select rows by indices.""" + + def to_pandas(self, **kwargs): + """Convert to pandas DataFrame.""" + + def to_pydict(self): + """Convert to dictionary of Python lists.""" + + def add_column(self, i, field, column): + """Add column at position i.""" + + def remove_column(self, i): + """Remove column at position i.""" + + def rename_columns(self, names): + """Rename columns.""" + +class RecordBatchReader: + """ + Interface for reading stream of record batches. + """ + + def __iter__(self): ... + + def read_next_batch(self): + """Read next batch from stream.""" + + def read_all(self): + """Read all batches into table.""" + + def schema(self): + """Get schema of batches.""" + +class TableGroupBy: + """ + Grouped table operations. + """ + + def aggregate(self, aggregations): + """Perform aggregations on groups.""" +``` + +### Schemas and Fields + +Schema definitions that describe table structure, column types, and metadata. Schemas provide type safety and enable efficient data processing by defining the expected structure of tabular data. + +```python { .api } +def schema(fields, metadata=None): + """ + Create schema from list of fields. + + Parameters: + - fields: sequence of Field objects or (name, type) tuples + - metadata: dict, key-value metadata for schema + + Returns: + Schema: Schema object with specified fields + """ + +def field(name, type, nullable=True, metadata=None): + """ + Create field with name and type. + + Parameters: + - name: str, field name + - type: DataType, field data type + - nullable: bool, whether field can contain nulls + - metadata: dict, key-value metadata for field + + Returns: + Field: Field object with specified properties + """ + +def unify_schemas(schemas): + """ + Unify multiple schemas into compatible schema. + + Parameters: + - schemas: sequence of Schema objects + + Returns: + Schema: Unified schema compatible with all input schemas + """ + +class Schema: + """ + Schema defining structure of tabular data. + + Attributes: + - names: List of field names + - types: List of field types + - metadata: Key-value metadata + """ + + def __len__(self): ... + def __getitem__(self, key): ... + def __iter__(self): ... + + def field(self, i): + """Get field by index or name.""" + + def get_field_index(self, name): + """Get index of field by name.""" + + def select(self, names): + """Select subset of fields.""" + + def insert(self, i, field): + """Insert field at position i.""" + + def append(self, field): + """Append field to schema.""" + + def remove(self, i): + """Remove field at position i.""" + + def with_metadata(self, metadata): + """Return schema with new metadata.""" + + def equals(self, other, check_metadata=True): + """Check equality with another schema.""" + + def to_string(self, **kwargs): + """String representation of schema.""" + +class Field: + """ + Named field in a schema with type and metadata. + + Attributes: + - name: Field name + - type: DataType of field + - nullable: Whether field can contain nulls + - metadata: Key-value metadata + """ + + def with_name(self, name): + """Return field with new name.""" + + def with_type(self, type): + """Return field with new type.""" + + def with_nullable(self, nullable): + """Return field with new nullable setting.""" + + def with_metadata(self, metadata): + """Return field with new metadata.""" + + def equals(self, other, check_metadata=True): + """Check equality with another field.""" + + def to_string(self, **kwargs): + """String representation of field.""" + +class KeyValueMetadata: + """ + Key-value metadata container. + """ + + def __len__(self): ... + def __getitem__(self, key): ... + def __iter__(self): ... + + def get(self, key, default=None): + """Get value by key.""" + + def keys(self): + """Get all keys.""" + + def values(self): + """Get all values.""" + + def items(self): + """Get key-value pairs.""" + + def to_dict(self): + """Convert to Python dictionary.""" +``` + +### Scalars + +Single typed values that provide consistent interface for working with individual data elements. Scalars maintain type information and null state, enabling type-safe operations on individual values. + +```python { .api } +def scalar(value, type=None): + """ + Create scalar from Python value. + + Parameters: + - value: Python value to wrap + - type: DataType, explicit type for scalar + + Returns: + Scalar: Typed scalar value + """ + +# Scalar constants +NA = ... # Not Available scalar +NULL = ... # Null scalar + +class Scalar: + """ + Base class for typed scalar values. + + Attributes: + - type: DataType of scalar + - is_valid: Whether scalar is non-null + """ + + def __eq__(self, other): ... + def __hash__(self): ... + + def as_py(self): + """Convert to Python value.""" + + def cast(self, target_type, safe=True): + """Cast to different type.""" + + def equals(self, other): + """Check equality with another scalar.""" + +# Specific scalar types are available for all Arrow data types: +# NullScalar, BooleanScalar, Int8Scalar, Int16Scalar, Int32Scalar, Int64Scalar, +# UInt8Scalar, UInt16Scalar, UInt32Scalar, UInt64Scalar, HalfFloatScalar, +# FloatScalar, DoubleScalar, Decimal128Scalar, StringScalar, BinaryScalar, +# Date32Scalar, Date64Scalar, TimestampScalar, Time32Scalar, Time64Scalar, +# DurationScalar, ListScalar, StructScalar, MapScalar, DictionaryScalar, etc. +``` + +### Tensors and Sparse Data + +Multi-dimensional arrays and sparse data structures for advanced numerical computing and machine learning applications. + +```python { .api } +class Tensor: + """ + Multi-dimensional array with Arrow data. + + Attributes: + - type: DataType of tensor elements + - shape: Shape tuple of tensor dimensions + - strides: Strides tuple for memory layout + - is_mutable: Whether tensor data is mutable + """ + + def __getitem__(self, key): ... + + def to_numpy(self): + """Convert to NumPy array.""" + + def equals(self, other): + """Check equality with another tensor.""" + +class SparseCOOTensor: + """Sparse tensor in COOrdinate format.""" + +class SparseCSRMatrix: + """Sparse matrix in Compressed Sparse Row format.""" + +class SparseCSCMatrix: + """Sparse matrix in Compressed Sparse Column format.""" + +class SparseCSFTensor: + """Sparse tensor in Compressed Sparse Fiber format.""" +``` + +## Type Definitions + +### Memory Management + +```python { .api } +class DictionaryMemo: + """ + Memo for dictionary encoding to ensure consistent dictionaries. + """ + + def __init__(self): ... + + def get_dictionary(self, type): + """Get dictionary for type.""" + + def set_dictionary(self, type, dictionary): + """Set dictionary for type.= 2021) + print(f"Filtered rows: {len(filtered)}") + + # Iterator over batches + total_batches = 0 + for batch in dataset.to_batches(batch_size=2): + total_batches += 1 + print(f"Batch {total_batches}: {batch.num_rows} rows") +``` + +### Partitioned Datasets + +```python +import pyarrow as pa +import pyarrow.dataset as ds +import pyarrow.parquet as pq +import tempfile +import os + +# Create larger sample data +data = { + 'year': [2020] * 100 + [2021] * 100 + [2022] * 100, + 'month': ([1] * 50 + [2] * 50) * 3, + 'day': list(range(1, 51)) * 6, + 'sales': [100 + i for i in range(300)], + 'region': (['North', 'South'] * 150) +} +large_table = pa.table(data) + +with tempfile.TemporaryDirectory() as tmpdir: + # Write partitioned dataset + ds.write_dataset( + large_table, + tmpdir, + format='parquet', + partitioning=['year', 'month'], + partitioning_flavor='hive' + ) + + # List created files + for root, dirs, files in os.walk(tmpdir): + for file in files: + rel_path = os.path.relpath(os.path.join(root, file), tmpdir) + print(f"Created: {rel_path}") + + # Read partitioned dataset + partitioned_dataset = ds.dataset(tmpdir, format='parquet') + + # Dataset automatically discovers partitioning + print(f"Partitioning: {partitioned_dataset.partitioning}") + print(f"Schema: {partitioned_dataset.schema}") + + # Filter by partition + year_2021 = partitioned_dataset.to_table( + filter=ds.field('year') == 2021 + ) + print(f"Year 2021 rows: {len(year_2021)}") + + # Multiple partition filters + specific_partition = partitioned_dataset.to_table( + filter=(ds.field('year') == 2021) & (ds.field('month') == 1) + ) + print(f"2021-01 rows: {len(specific_partition)}") + + # Get partition information + fragments = list(partitioned_dataset.get_fragments()) + for i, fragment in enumerate(fragments[:3]): # First 3 fragments + print(f"Fragment {i}: {fragment.partition_expression}") +``` + +### Advanced Filtering and Projection + +```python +import pyarrow as pa +import pyarrow.dataset as ds +import pyarrow.compute as pc +import tempfile + +# Create dataset with complex data +table = pa.table({ + 'id': range(1000), + 'category': ['A', 'B', 'C'] * 334, # Cycling categories + 'value': [i * 1.5 for i in range(1000)], + 'timestamp': pa.array([ + f'2023-{(i % 12) + 1:02d}-{(i % 28) + 1:02d}' + for i in range(1000) + ], type=pa.string()), + 'tags': [['tag1', 'tag2'][:i % 3] for i in range(1000)] +}) + +with tempfile.TemporaryDirectory() as tmpdir: + # Write dataset + ds.write_dataset(table, tmpdir, format='parquet') + dataset = ds.dataset(tmpdir, format='parquet') + + # Complex filters + complex_filter = ( + (ds.field('category').isin(['A', 'B'])) & + (ds.field('value') > 500) & + (ds.field('id') % 10 == 0) + ) + + filtered_data = dataset.to_table(filter=complex_filter) + print(f"Complex filter result: {len(filtered_data)} rows") + + # String operations in filters + timestamp_filter = ds.field('timestamp').match_substring('2023-01') + jan_data = dataset.to_table(filter=timestamp_filter) + print(f"January data: {len(jan_data)} rows") + + # Scanner with custom settings + scanner = dataset.scanner( + columns=['id', 'category', 'value'], + filter=ds.field('value') > 750, + batch_size=100 + ) + + # Process in batches + batch_count = 0 + total_rows = 0 + for batch in scanner.to_batches(): + batch_count += 1 + total_rows += batch.num_rows + if batch_count <= 3: # Show first 3 batches + print(f"Batch {batch_count}: {batch.num_rows} rows, " + f"value range: {pc.min(batch['value'])} - {pc.max(batch['value'])}") + + print(f"Total batches: {batch_count}, total rows: {total_rows}") +``` + +### Multi-Format Datasets + +```python +import pyarrow as pa +import pyarrow.dataset as ds +import pyarrow.parquet as pq +import pyarrow.csv as csv +import pyarrow.feather as feather +import tempfile +import os + +# Create sample data +base_data = { + 'id': range(100), + 'name': [f'item_{i}' for i in range(100)], + 'price': [10.0 + i * 0.5 for i in range(100)] +} + +table = pa.table(base_data) + +with tempfile.TemporaryDirectory() as tmpdir: + # Write in different formats + pq.write_table(table.slice(0, 30), os.path.join(tmpdir, 'data1.parquet')) + csv.write_csv(table.slice(30, 30), os.path.join(tmpdir, 'data2.csv')) + feather.write_feather(table.slice(60, 40), os.path.join(tmpdir, 'data3.feather')) + + # Create datasets for each format + parquet_ds = ds.dataset( + os.path.join(tmpdir, 'data1.parquet'), + format='parquet' + ) + + csv_ds = ds.dataset( + os.path.join(tmpdir, 'data2.csv'), + format='csv' + ) + + feather_ds = ds.dataset( + os.path.join(tmpdir, 'data3.feather'), + format='ipc' # Feather uses IPC format + ) + + # Union datasets + union_ds = ds.UnionDataset([parquet_ds, csv_ds, feather_ds]) + + # Read unified dataset + unified_table = union_ds.to_table() + print(f"Unified dataset rows: {len(unified_table)}") + print(f"Schema: {unified_table.schema}") + + # Verify data integrity + assert len(unified_table) == 100 + assert unified_table['id'].to_pylist() == list(range(100)) + + # Process by source + for i, fragment in enumerate(union_ds.get_fragments()): + fragment_table = fragment.to_table() + print(f"Fragment {i}: {len(fragment_table)} rows from {type(fragment).__name__}") +``` + +### Dataset Schema Evolution + +```python +import pyarrow as pa +import pyarrow.dataset as ds +import pyarrow.parquet as pq +import tempfile +import os + +with tempfile.TemporaryDirectory() as tmpdir: + # Version 1 schema + v1_table = pa.table({ + 'id': [1, 2, 3], + 'name': ['Alice', 'Bob', 'Charlie'], + 'value': [10.5, 20.3, 30.1] + }) + + # Version 2 schema (added column) + v2_table = pa.table({ + 'id': [4, 5, 6], + 'name': ['Diana', 'Eve', 'Frank'], + 'value': [40.7, 50.2, 60.8], + 'category': ['A', 'B', 'A'] # New column + }) + + # Version 3 schema (changed type, added column) + v3_table = pa.table({ + 'id': [7, 8, 9], + 'name': ['Grace', 'Henry', 'Iris'], + 'value': [70.1, 80.9, 90.5], + 'category': ['B', 'A', 'B'], + 'timestamp': ['2023-01-01', '2023-01-02', '2023-01-03'] # Another new column + }) + + # Write different versions + pq.write_table(v1_table, os.path.join(tmpdir, 'v1.parquet')) + pq.write_table(v2_table, os.path.join(tmpdir, 'v2.parquet')) + pq.write_table(v3_table, os.path.join(tmpdir, 'v3.parquet')) + + # Create dataset with schema evolution + dataset = ds.dataset(tmpdir, format='parquet') + + # Dataset handles schema evolution automatically + print(f"Unified schema: {dataset.schema}") + + # Read all data - missing columns filled with nulls + full_table = dataset.to_table() + print(f"Total rows: {len(full_table)}") + print(f"Columns: {full_table.column_names}") + + # Show schema evolution effects + for col_name in full_table.column_names: + column = full_table[col_name] + null_count = pc.count(column, mode='only_null').as_py() + print(f"Column '{col_name}': {null_count} nulls out of {len(column)}") + + # Handle schema evolution explicitly + # Define target schema + target_schema = pa.schema([ + pa.field('id', pa.int64()), + pa.field('name', pa.string()), + pa.field('value', pa.float64()), + pa.field('category', pa.string()), + pa.field('timestamp', pa.string()), + pa.field('version', pa.int32()) # Add version tracking + ]) + + # Project to target schema with computed column + projected = dataset.to_table( + schema=target_schema, + # Note: computed columns require more advanced techniques + # This example shows the concept + ) +``` + +### Performance Optimization + +```python +import pyarrow as pa +import pyarrow.dataset as ds +import pyarrow.parquet as pq +import tempfile +import time + +# Create large dataset for performance testing +n_rows = 100000 +large_table = pa.table({ + 'id': range(n_rows), + 'category': ['A', 'B', 'C', 'D'] * (n_rows // 4), + 'value1': [i * 1.1 for i in range(n_rows)], + 'value2': [i * 2.2 for i in range(n_rows)], + 'timestamp': pa.array([ + f'2023-{(i % 12) + 1:02d}-{(i % 28) + 1:02d}' + for i in range(n_rows) + ]) +}) + +with tempfile.TemporaryDirectory() as tmpdir: + # Write with different configurations + + # Single file + start_time = time.time() + ds.write_dataset( + large_table, + os.path.join(tmpdir, 'single_file'), + format='parquet', + basename_template='data.parquet' + ) + single_file_write_time = time.time() - start_time + + # Partitioned by category + start_time = time.time() + ds.write_dataset( + large_table, + os.path.join(tmpdir, 'partitioned'), + format='parquet', + partitioning=['category'] + ) + partitioned_write_time = time.time() - start_time + + # Multiple files with row limit + start_time = time.time() + ds.write_dataset( + large_table, + os.path.join(tmpdir, 'multi_file'), + format='parquet', + max_rows_per_file=20000 + ) + multi_file_write_time = time.time() - start_time + + print(f"Write times:") + print(f" Single file: {single_file_write_time:.3f}s") + print(f" Partitioned: {partitioned_write_time:.3f}s") + print(f" Multi-file: {multi_file_write_time:.3f}s") + + # Compare read performance + datasets = { + 'single_file': ds.dataset(os.path.join(tmpdir, 'single_file')), + 'partitioned': ds.dataset(os.path.join(tmpdir, 'partitioned')), + 'multi_file': ds.dataset(os.path.join(tmpdir, 'multi_file')) + } + + # Full table read + print(f"\nFull table read times:") + for name, dataset in datasets.items(): + start_time = time.time() + table = dataset.to_table() + read_time = time.time() - start_time + print(f" {name}: {read_time:.3f}s ({len(table)} rows)") + + # Filtered read (category = 'A') + print(f"\nFiltered read times (category='A'):") + for name, dataset in datasets.items(): + start_time = time.time() + filtered = dataset.to_table(filter=ds.field('category') == 'A') + read_time = time.time() - start_time + print(f" {name}: {read_time:.3f}s ({len(filtered)} rows)") + + # Column projection + print(f"\nProjected read times (id, value1 only):") + for name, dataset in datasets.items(): + start_time = time.time() + projected = dataset.to_table(columns=['id', 'value1']) + read_time = time.time() - start_time + print(f" {name}: {read_time:.3f}s") +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pyarrow/docs/file-formats.md b/.tessl/tiles/tessl/pypi-pyarrow/docs/file-formats.md new file mode 100644 index 0000000..937b30a --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pyarrow/docs/file-formats.md @@ -0,0 +1,733 @@ +# File Format Support + +Native support for reading and writing multiple file formats including Parquet, CSV, JSON, Feather, and ORC. Provides high-performance I/O with configurable options for compression, encoding, metadata handling, and integration with cloud storage systems. + +## Capabilities + +### Parquet Format + +High-performance columnar storage format with advanced features including compression, encoding, statistics, and schema evolution support. + +```python { .api } +# Main I/O functions +def read_table(source, columns=None, use_threads=True, metadata=None, schema=None, use_pandas_metadata=False, read_dictionary=None, memory_map=False, buffer_size=None, partitioning=None, filesystem=None, filters=None, use_legacy_dataset=None, ignore_prefixes=None, pre_buffer=None, coerce_int96_timestamp_unit=None, thrift_string_size_limit=None, thrift_container_size_limit=None): + """ + Read Parquet file as Arrow Table. + + Parameters: + - source: str or file-like, path or file object + - columns: list of str, columns to read + - use_threads: bool, use multiple threads + - metadata: FileMetaData, pre-loaded metadata + - schema: Schema, expected schema + - use_pandas_metadata: bool, use pandas metadata + - read_dictionary: list, columns to dictionary encode + - memory_map: bool, use memory mapping + - buffer_size: int, read buffer size + - partitioning: Partitioning, dataset partitioning + - filesystem: FileSystem, filesystem to use + - filters: list, row filters + - use_legacy_dataset: bool, use legacy dataset API + - ignore_prefixes: list, prefixes to ignore + - pre_buffer: bool, pre-buffer columns + - coerce_int96_timestamp_unit: str, int96 timestamp unit + - thrift_string_size_limit: int, thrift string size limit + - thrift_container_size_limit: int, thrift container size limit + + Returns: + Table: Arrow table with data from Parquet file + """ + +def write_table(table, where, row_group_size=None, version='2.6', use_dictionary=None, compression='snappy', write_statistics=None, use_deprecated_int96_timestamps=None, coerce_timestamps=None, allow_truncated_timestamps=False, data_page_size=None, data_page_version='1.0', compression_level=None, use_byte_stream_split=None, column_encoding=None, data_encoding=None, use_compliant_nested_type=None, encryption_properties=None, write_batch_size=None, dictionary_pagesize_limit=None, store_schema=None, write_page_index=None, write_page_checksum=None, sorting_columns=None, filesystem=None, metadata_collector=None): + """ + Write Arrow Table to Parquet file. + + Parameters: + - table: Table, Arrow table to write + - where: str or file-like, output path or file + - row_group_size: int, maximum rows per row group + - version: str, Parquet format version + - use_dictionary: bool or list, dictionary encoding + - compression: str or dict, compression codec + - write_statistics: bool or list, write column statistics + - use_deprecated_int96_timestamps: bool, use int96 for timestamps + - coerce_timestamps: str, timestamp coercion unit + - allow_truncated_timestamps: bool, allow timestamp truncation + - data_page_size: int, target data page size + - data_page_version: str, data page version + - compression_level: int, compression level + - use_byte_stream_split: bool or list, byte stream split encoding + - column_encoding: dict, column encoding options + - data_encoding: dict, data encoding options + - use_compliant_nested_type: bool, compliant nested type naming + - encryption_properties: FileEncryptionProperties, encryption settings + - write_batch_size: int, write batch size + - dictionary_pagesize_limit: int, dictionary page size limit + - store_schema: bool, store schema in metadata + - write_page_index: bool, write page index + - write_page_checksum: bool, write page checksums + - sorting_columns: list, column sorting information + - filesystem: FileSystem, filesystem to use + - metadata_collector: list, collect metadata + """ + +def read_pandas(source, columns=None, **kwargs): + """Read Parquet file optimized for pandas DataFrame.""" + +def read_schema(where, memory_map=False, metadata=None, filesystem=None): + """ + Read schema from Parquet file. + + Parameters: + - where: str or file-like, path or file object + - memory_map: bool, use memory mapping + - metadata: FileMetaData, pre-loaded metadata + - filesystem: FileSystem, filesystem to use + + Returns: + Schema: Arrow schema from Parquet file + """ + +def read_metadata(where, memory_map=False, decryption_properties=None, filesystem=None): + """ + Read metadata from Parquet file. + + Parameters: + - where: str or file-like, path or file object + - memory_map: bool, use memory mapping + - decryption_properties: FileDecryptionProperties, decryption settings + - filesystem: FileSystem, filesystem to use + + Returns: + FileMetaData: Parquet file metadata + """ + +class ParquetFile: + """ + Interface for reading Parquet files. + + Attributes: + - metadata: FileMetaData object + - schema: Arrow schema + - schema_arrow: Arrow schema (alias) + - num_row_groups: Number of row groups + """ + + def __init__(self, source, metadata=None, common_metadata=None, read_dictionary=None, memory_map=False, buffer_size=None, pre_buffer=None, coerce_int96_timestamp_unit=None, decryption_properties=None, thrift_string_size_limit=None, thrift_container_size_limit=None): ... + + def read(self, columns=None, use_threads=True, use_pandas_metadata=False): + """Read entire file as Table.""" + + def read_row_group(self, i, columns=None, use_threads=True, use_pandas_metadata=False): + """Read specific row group.""" + + def read_row_groups(self, row_groups, columns=None, use_threads=True, use_pandas_metadata=False): + """Read multiple row groups.""" + + def iter_batches(self, batch_size=1024, row_groups=None, columns=None, use_threads=True, use_pandas_metadata=False): + """Iterate over record batches.""" + + def scan_contents(self, columns=None, batch_size=1024): + """Scan file contents.""" + +class ParquetWriter: + """ + Writer for Parquet files. + """ + + def __init__(self, where, schema, filesystem=None, **kwargs): ... + + def write_batch(self, batch, row_group_size=None): + """Write record batch.""" + + def write_table(self, table, row_group_size=None): + """Write table.""" + + def close(self): + """Close writer and finalize file.""" + +# Metadata classes +class FileMetaData: + """ + Parquet file metadata. + + Attributes: + - created_by: Creator information + - format_version: Parquet format version + - metadata: Key-value metadata + - num_columns: Number of columns + - num_row_groups: Number of row groups + - num_rows: Total number of rows + - schema: Parquet schema + - serialized_size: Serialized metadata size + """ + + def row_group(self, i): + """Get row group metadata.""" + + def to_dict(self): + """Convert to dictionary.""" + +class RowGroupMetaData: + """ + Row group metadata. + + Attributes: + - num_columns: Number of columns in row group + - num_rows: Number of rows in row group + - total_byte_size: Total byte size + """ + + def column(self, i): + """Get column chunk metadata.""" + +class ColumnChunkMetaData: + """ + Column chunk metadata. + + Attributes: + - column_path: Column path in schema + - compression: Compression codec + - data_page_offset: Data page offset + - dictionary_page_offset: Dictionary page offset + - encodings: List of encodings used + - file_offset: File offset + - file_path: File path (for external columns) + - has_dictionary_page: Whether has dictionary page + - index_page_offset: Index page offset + - num_values: Number of values + - physical_type: Physical storage type + - statistics: Column statistics + - total_compressed_size: Compressed size + - total_uncompressed_size: Uncompressed size + """ + + def to_dict(self): + """Convert to dictionary.""" + +class ParquetSchema: + """ + Parquet schema representation. + + Attributes: + - names: Column names + - pandas_metadata: Pandas metadata + """ + + def column(self, i): + """Get column schema.""" + + def to_arrow_schema(self): + """Convert to Arrow schema.""" + +# Encryption support +class FileEncryptionProperties: + """File-level encryption properties.""" + +class FileDecryptionProperties: + """File-level decryption properties.""" +``` + +### CSV Format + +Flexible CSV reading and writing with extensive parsing options, type inference, and error handling capabilities. + +```python { .api } +def read_csv(input_file, read_options=None, parse_options=None, convert_options=None): + """ + Read CSV file as Arrow Table. + + Parameters: + - input_file: str or file-like, CSV file to read + - read_options: ReadOptions, reading configuration + - parse_options: ParseOptions, parsing configuration + - convert_options: ConvertOptions, conversion configuration + + Returns: + Table: Arrow table with CSV data + """ + +def write_csv(data, output_file, write_options=None): + """ + Write Table to CSV file. + + Parameters: + - data: Table or RecordBatch, data to write + - output_file: str or file-like, output CSV file + - write_options: WriteOptions, writing configuration + """ + +def open_csv(input_file, read_options=None, parse_options=None, convert_options=None): + """ + Open CSV file for streaming. + + Parameters: + - input_file: str or file-like, CSV file to open + - read_options: ReadOptions, reading configuration + - parse_options: ParseOptions, parsing configuration + - convert_options: ConvertOptions, conversion configuration + + Returns: + CSVStreamingReader: Streaming CSV reader + """ + +class ReadOptions: + """ + CSV reading options. + + Attributes: + - use_threads: Whether to use multiple threads + - block_size: Block size for reading + - skip_rows: Number of rows to skip at start + - skip_rows_after_names: Rows to skip after header + - column_names: Explicit column names + - autogenerate_column_names: Auto-generate column names + - encoding: Character encoding (default: utf8) + """ + +class ParseOptions: + """ + CSV parsing options. + + Attributes: + - delimiter: Field delimiter character + - quote_char: Quote character + - double_quote: Whether quotes are doubled for escaping + - escape_char: Escape character + - newlines_in_values: Allow newlines in values + - ignore_empty_lines: Skip empty lines + """ + +class ConvertOptions: + """ + CSV type conversion options. + + Attributes: + - check_utf8: Validate UTF-8 encoding + - column_types: Explicit column types (dict) + - null_values: Values to treat as null + - true_values: Values to treat as True + - false_values: Values to treat as False + - decimal_point: Decimal point character + - strings_can_be_null: Whether strings can be null + - quoted_strings_can_be_null: Whether quoted strings can be null + - auto_dict_encode: Auto dictionary-encode string columns + - auto_dict_max_cardinality: Max cardinality for auto dict encoding + - include_columns: Columns to include + - include_missing_columns: Include missing columns as null + - timestamp_parsers: Custom timestamp parsers + """ + +class WriteOptions: + """ + CSV writing options. + + Attributes: + - include_header: Include column names as header + - batch_size: Batch size for writing + - delimiter: Field delimiter + - quoting_style: When to quote fields + """ + +class CSVStreamingReader: + """ + Streaming CSV reader for large files. + """ + + def __iter__(self): ... + + def read_next_batch(self): + """Read next batch of records.""" + + def schema(self): + """Get schema of CSV data.""" + +class CSVWriter: + """CSV writer with configurable options.""" + + def __init__(self, sink, schema, write_options=None): ... + + def write_batch(self, batch): + """Write record batch.""" + + def write_table(self, table): + """Write table.""" + + def close(self): + """Close writer.""" + +class InvalidRow: + """Information about invalid rows during parsing.""" + +ISO8601 = ... # ISO8601 timestamp parsing constant +``` + +### JSON Format + +Line-delimited JSON reading with schema inference and flexible parsing options for semi-structured data. + +```python { .api } +def read_json(input_file, read_options=None, parse_options=None): + """ + Read line-delimited JSON file as Arrow Table. + + Parameters: + - input_file: str or file-like, JSON file to read + - read_options: ReadOptions, reading configuration + - parse_options: ParseOptions, parsing configuration + + Returns: + Table: Arrow table with JSON data + """ + +def open_json(input_file, read_options=None, parse_options=None): + """ + Open JSON file for streaming. + + Parameters: + - input_file: str or file-like, JSON file to open + - read_options: ReadOptions, reading configuration + - parse_options: ParseOptions, parsing configuration + + Returns: + Iterator: Streaming JSON reader + """ + +class ReadOptions: + """ + JSON reading options. + + Attributes: + - use_threads: Whether to use multiple threads + - block_size: Block size for reading + - schema: Explicit schema + """ + +class ParseOptions: + """ + JSON parsing options. + + Attributes: + - newlines_in_values: Allow newlines in string values + - explicit_schema: Use explicit schema + - unexpected_field_behavior: How to handle unexpected fields + """ +``` + +### Feather Format + +Fast, language-agnostic columnar serialization format optimized for data interchange and temporary storage. + +```python { .api } +def read_table(source, columns=None, use_threads=True, memory_map=False): + """ + Read Feather file as Arrow Table. + + Parameters: + - source: str or file-like, Feather file to read + - columns: list of str, columns to read + - use_threads: bool, use multiple threads + - memory_map: bool, use memory mapping + + Returns: + Table: Arrow table with Feather data + """ + +def read_feather(source, columns=None, use_threads=True, memory_map=False): + """Read Feather file (pandas compatibility).""" + +def write_feather(df, dest, compression=None, compression_level=None, chunksize=None, version=None): + """ + Write Table to Feather file. + + Parameters: + - df: Table or pandas DataFrame, data to write + - dest: str or file-like, output Feather file + - compression: str, compression codec + - compression_level: int, compression level + - chunksize: int, maximum rows per chunk + - version: int, Feather format version + """ + +class FeatherDataset: + """Multi-file Feather dataset interface.""" + +class FeatherError(Exception): + """Feather format-specific errors.""" +``` + +### ORC Format + +Optimized Row Columnar format with advanced compression and indexing for big data processing. + +```python { .api } +def read_table(source, columns=None, use_threads=True, memory_map=False): + """ + Read ORC file as Arrow Table. + + Parameters: + - source: str or file-like, ORC file to read + - columns: list of str, columns to read + - use_threads: bool, use multiple threads + - memory_map: bool, use memory mapping + + Returns: + Table: Arrow table with ORC data + """ + +def write_table(table, where, file_version='0.12', batch_size=1024, stripe_size=67108864, compression='ZLIB', compression_block_size=65536, compression_strategy='speed', row_index_stride=10000, padding_tolerance=0.0, dictionary_key_size_threshold=0.0, bloom_filter_columns=None, bloom_filter_fpp=0.05): + """ + Write Arrow Table to ORC file. + + Parameters: + - table: Table, Arrow table to write + - where: str or file-like, output ORC file + - file_version: str, ORC file format version + - batch_size: int, batch size for writing + - stripe_size: int, target stripe size in bytes + - compression: str, compression codec + - compression_block_size: int, compression block size + - compression_strategy: str, compression strategy + - row_index_stride: int, row index stride + - padding_tolerance: float, padding tolerance + - dictionary_key_size_threshold: float, dictionary encoding threshold + - bloom_filter_columns: list, columns for bloom filters + - bloom_filter_fpp: float, bloom filter false positive probability + """ + +class ORCFile: + """ + ORC file reader interface. + + Attributes: + - metadata: ORC file metadata + - schema: Arrow schema + - nrows: Number of rows + - nstripes: Number of stripes + """ + + def __init__(self, source, memory_map=False): ... + + def read(self, columns=None, use_threads=True): + """Read entire file as Table.""" + + def read_stripe(self, n, columns=None): + """Read specific stripe.""" +``` + +## Usage Examples + +### Working with Parquet Files + +```python +import pyarrow as pa +import pyarrow.parquet as pq + +# Write Parquet file +table = pa.table({ + 'id': [1, 2, 3, 4, 5], + 'name': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve'], + 'value': [10.5, 20.3, 30.1, 40.7, 50.2] +}) + +# Basic write +pq.write_table(table, 'example.parquet') + +# Advanced write with options +pq.write_table( + table, + 'advanced.parquet', + compression='snappy', + use_dictionary=['name'], + row_group_size=2, + write_statistics=True +) + +# Read Parquet file +loaded_table = pq.read_table('example.parquet') + +# Read specific columns +subset = pq.read_table('example.parquet', columns=['id', 'name']) + +# Read with filtering +filtered = pq.read_table( + 'example.parquet', + filters=[('value', '>', 25.0)] +) + +# Working with ParquetFile class +parquet_file = pq.ParquetFile('example.parquet') +print(f"Schema: {parquet_file.schema}") +print(f"Metadata: {parquet_file.metadata}") +print(f"Row groups: {parquet_file.num_row_groups}") + +# Read row group +row_group_0 = parquet_file.read_row_group(0) + +# Iterate over batches +for batch in parquet_file.iter_batches(batch_size=2): + print(batch) +``` + +### CSV File Operations + +```python +import pyarrow as pa +import pyarrow.csv as csv + +# Basic CSV reading +table = csv.read_csv('data.csv') + +# Advanced CSV reading with options +read_options = csv.ReadOptions( + skip_rows=1, + column_names=['id', 'name', 'age', 'salary'] +) +parse_options = csv.ParseOptions( + delimiter=',', + quote_char='"', + escape_char='\\' +) +convert_options = csv.ConvertOptions( + column_types={ + 'id': pa.int64(), + 'name': pa.string(), + 'age': pa.int32(), + 'salary': pa.float64() + }, + null_values=['', 'NULL', 'null'], + strings_can_be_null=True +) + +table = csv.read_csv( + 'data.csv', + read_options=read_options, + parse_options=parse_options, + convert_options=convert_options +) + +# Streaming CSV reading +reader = csv.open_csv('large_data.csv') +for batch in reader: + # Process batch + print(f"Batch shape: {batch.num_rows} x {batch.num_columns}") + +# Write CSV +csv.write_csv(table, 'output.csv') + +# Write with options +write_options = csv.WriteOptions( + include_header=True, + delimiter=';', + quoting_style='needed' +) +csv.write_csv(table, 'output_custom.csv', write_options=write_options) +``` + +### Multi-Format Workflow + +```python +import pyarrow as pa +import pyarrow.parquet as pq +import pyarrow.csv as csv +import pyarrow.feather as feather +import pyarrow.orc as orc + +# Create sample data +table = pa.table({ + 'date': pa.array(['2023-01-01', '2023-01-02', '2023-01-03']), + 'value': [100.5, 200.3, 150.7], + 'category': ['A', 'B', 'A'] +}) + +# Write to different formats +pq.write_table(table, 'data.parquet') +csv.write_csv(table, 'data.csv') +feather.write_feather(table, 'data.feather') +orc.write_table(table, 'data.orc') + +# Read from different formats +parquet_table = pq.read_table('data.parquet') +csv_table = csv.read_csv('data.csv') +feather_table = feather.read_table('data.feather') +orc_table = orc.read_table('data.orc') + +# Verify all tables are equal +assert parquet_table.equals(csv_table) +assert csv_table.equals(feather_table) +assert feather_table.equals(orc_table) + +# Performance comparison +import time + +def time_format(read_func, write_func, filename): + # Write timing + start = time.time() + write_func(table, filename) + write_time = time.time() - start + + # Read timing + start = time.time() + result = read_func(filename) + read_time = time.time() - start + + return write_time, read_time + +# Compare formats +formats = [ + ('Parquet', pq.read_table, pq.write_table, 'test.parquet'), + ('Feather', feather.read_table, feather.write_feather, 'test.feather'), + ('ORC', orc.read_table, orc.write_table, 'test.orc') +] + +for name, read_func, write_func, filename in formats: + write_time, read_time = time_format(read_func, write_func, filename) + print(f"{name}: Write {write_time:.4f}s, Read {read_time:.4f}s") +``` + +### Advanced Parquet Features + +```python +import pyarrow as pa +import pyarrow.parquet as pq + +# Schema evolution example +old_schema = pa.schema([ + pa.field('id', pa.int64()), + pa.field('name', pa.string()), + pa.field('value', pa.float64()) +]) + +new_schema = pa.schema([ + pa.field('id', pa.int64()), + pa.field('name', pa.string()), + pa.field('value', pa.float64()), + pa.field('category', pa.string()) # New column +]) + +# Write with old schema +old_table = pa.table([ + [1, 2, 3], + ['A', 'B', 'C'], + [10.5, 20.3, 30.1] +], schema=old_schema) + +pq.write_table(old_table, 'old_format.parquet') + +# Read and extend with new schema +loaded = pq.read_table('old_format.parquet') +extended = loaded.add_column('category', pa.array([None, None, None])) + +# Write with new schema +pq.write_table(extended, 'new_format.parquet') + +# Metadata handling +metadata = {'version': '1.0', 'created_by': 'pyarrow_example'} +table_with_metadata = table.replace_schema_metadata(metadata) +pq.write_table(table_with_metadata, 'with_metadata.parquet') + +# Read metadata +file_metadata = pq.read_metadata('with_metadata.parquet') +print(f"File metadata: {file_metadata.metadata}") +print(f"Schema metadata: {file_metadata.schema.to_arrow_schema().metadata}") +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pyarrow/docs/index.md b/.tessl/tiles/tessl/pypi-pyarrow/docs/index.md new file mode 100644 index 0000000..e29c708 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pyarrow/docs/index.md @@ -0,0 +1,217 @@ +# PyArrow + +PyArrow is the Python implementation of Apache Arrow, providing a high-performance interface to the Arrow columnar memory format and computing libraries. It enables efficient data interchange, in-memory analytics, and seamless integration with the Python data science ecosystem including pandas, NumPy, and big data processing systems. + +## Package Information + +- **Package Name**: pyarrow +- **Language**: Python +- **Installation**: `pip install pyarrow` +- **Documentation**: https://arrow.apache.org/docs/python + +## Core Imports + +```python +import pyarrow as pa +``` + +Common specialized imports: + +```python +import pyarrow.compute as pc +import pyarrow.parquet as pq +import pyarrow.csv as csv +import pyarrow.dataset as ds +import pyarrow.flight as flight +``` + +## Basic Usage + +```python +import pyarrow as pa +import numpy as np + +# Create arrays from Python data +arr = pa.array([1, 2, 3, 4, 5]) +str_arr = pa.array(['hello', 'world', None, 'arrow']) + +# Create tables +table = pa.table({ + 'integers': [1, 2, 3, 4], + 'strings': ['foo', 'bar', 'baz', None], + 'floats': [1.0, 2.5, 3.7, 4.1] +}) + +# Read/write Parquet files +import pyarrow.parquet as pq +pq.write_table(table, 'example.parquet') +loaded_table = pq.read_table('example.parquet') + +# Compute operations +import pyarrow.compute as pc +result = pc.sum(arr) +filtered = pc.filter(table, pc.greater(table['integers'], 2)) +``` + +## Architecture + +PyArrow's design centers around the Arrow columnar memory format: + +- **Columnar Storage**: Data organized by columns for efficient analytical operations +- **Zero-Copy Operations**: Memory-efficient data sharing between processes and languages +- **Type System**: Rich data types including nested structures, decimals, and temporal types +- **Compute Engine**: Vectorized operations for high-performance analytics +- **Format Support**: Native support for Parquet, CSV, JSON, ORC, and custom formats +- **Interoperability**: Seamless integration with pandas, NumPy, and other Python libraries + +This architecture enables PyArrow to serve as a foundational component for building scalable data processing applications with fast data movement between systems while maintaining memory efficiency through columnar layouts. + +## Capabilities + +### Core Data Structures + +Fundamental data containers including arrays, tables, schemas, and type definitions. These form the foundation for all PyArrow operations and provide the columnar data structures that enable efficient analytics. + +```python { .api } +def array(obj, type=None, mask=None, size=None, from_pandas=None, safe=True): ... +def table(data, schema=None, metadata=None, columns=None): ... +def schema(fields, metadata=None): ... +def field(name, type, nullable=True, metadata=None): ... + +class Array: ... +class Table: ... +class Schema: ... +class Field: ... +``` + +[Core Data Structures](./core-data-structures.md) + +### Data Types System + +Comprehensive type system supporting primitive types, nested structures, temporal types, and custom extension types. Provides type checking, conversion, and inference capabilities essential for data processing workflows. + +```python { .api } +def int64(): ... +def string(): ... +def timestamp(unit, tz=None): ... +def list_(value_type): ... +def struct(fields): ... + +class DataType: ... +def is_integer(type): ... +def cast(arr, target_type, safe=True): ... +``` + +[Data Types](./data-types.md) + +### Compute Functions + +High-performance vectorized compute operations including mathematical functions, string operations, temporal calculations, aggregations, and filtering. The compute engine provides 200+ functions optimized for columnar data. + +```python { .api } +def add(x, y): ... +def subtract(x, y): ... +def multiply(x, y): ... +def sum(array): ... +def filter(data, mask): ... +def take(data, indices): ... +``` + +[Compute Functions](./compute-functions.md) + +### File Format Support + +Native support for reading and writing multiple file formats including Parquet, CSV, JSON, Feather, and ORC. Provides high-performance I/O with configurable options for compression, encoding, and metadata handling. + +```python { .api } +# Parquet +def read_table(source, **kwargs): ... +def write_table(table, where, **kwargs): ... + +# CSV +def read_csv(input_file, **kwargs): ... +def write_csv(data, output_file, **kwargs): ... +``` + +[File Formats](./file-formats.md) + +### Memory and I/O Management + +Memory pool management, buffer operations, compression codecs, and file system abstraction. Provides control over memory allocation and efficient I/O operations across different storage systems. + +```python { .api } +def default_memory_pool(): ... +def compress(data, codec=None): ... +def input_stream(source): ... + +class Buffer: ... +class MemoryPool: ... +``` + +[Memory and I/O](./memory-io.md) + +### Dataset Operations + +Multi-file dataset interface supporting partitioned data, lazy evaluation, and distributed processing. Enables efficient querying of large datasets stored across multiple files with automatic partition discovery. + +```python { .api } +def dataset(source, **kwargs): ... +def write_dataset(data, base_dir, **kwargs): ... + +class Dataset: ... +class Scanner: ... +``` + +[Dataset Operations](./dataset-operations.md) + +### Arrow Flight RPC + +High-performance RPC framework for distributed data services. Provides client-server architecture for streaming large datasets with authentication, metadata handling, and custom middleware support. + +```python { .api } +def connect(location, **kwargs): ... + +class FlightClient: ... +class FlightServerBase: ... +class FlightDescriptor: ... +``` + +[Arrow Flight](./arrow-flight.md) + +### Advanced Features + +Specialized functionality including CUDA GPU support, Substrait query integration, execution engine operations, and data interchange protocols for advanced use cases and system integration. + +```python { .api } +# CUDA support +class Context: ... +class CudaBuffer: ... + +# Substrait integration +def run_query(plan): ... +def serialize_expressions(expressions): ... +``` + +[Advanced Features](./advanced-features.md) + +## Version and Build Information + +```python { .api } +def show_versions(): ... +def show_info(): ... +def cpp_build_info(): ... +def runtime_info(): ... +``` + +Access to version information, build configuration, and runtime environment details for troubleshooting and compatibility checking. + +## Exception Handling + +```python { .api } +class ArrowException(Exception): ... +class ArrowInvalid(ArrowException): ... +class ArrowTypeError(ArrowException): ... +class ArrowIOError(ArrowException): ... +``` + +Comprehensive exception hierarchy for error handling in data processing workflows. \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pyarrow/docs/memory-io.md b/.tessl/tiles/tessl/pypi-pyarrow/docs/memory-io.md new file mode 100644 index 0000000..b30ea1a --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pyarrow/docs/memory-io.md @@ -0,0 +1,743 @@ +# Memory and I/O Management + +Memory pool management, buffer operations, compression codecs, and file system abstraction. Provides fine-grained control over memory allocation, efficient I/O operations, and support for various storage systems including local files, cloud storage, and in-memory buffers. + +## Capabilities + +### Memory Management + +Control over memory allocation with pluggable memory pools, tracking, and different backend implementations. + +```python { .api } +def default_memory_pool(): + """ + Get default memory pool. + + Returns: + MemoryPool: Default memory pool instance + """ + +def system_memory_pool(): + """ + Get system memory pool. + + Returns: + MemoryPool: System memory pool using malloc/free + """ + +def jemalloc_memory_pool(): + """ + Get jemalloc memory pool (if available). + + Returns: + MemoryPool: jemalloc-based memory pool + """ + +def mimalloc_memory_pool(): + """ + Get mimalloc memory pool (if available). + + Returns: + MemoryPool: mimalloc-based memory pool + """ + +def set_memory_pool(pool): + """ + Set default memory pool. + + Parameters: + - pool: MemoryPool, memory pool to use as default + """ + +def total_allocated_bytes(): + """ + Get total allocated bytes across all pools. + + Returns: + int: Total allocated bytes + """ + +def supported_memory_backends(): + """ + List supported memory pool backends. + + Returns: + list of str: Available memory backends + """ + +def log_memory_allocations(enable): + """ + Control memory allocation logging. + + Parameters: + - enable: bool, whether to enable logging + """ + +def jemalloc_set_decay_ms(decay_ms): + """ + Set jemalloc decay time. + + Parameters: + - decay_ms: int, decay time in milliseconds + """ + +class MemoryPool: + """ + Abstract memory pool interface. + + Attributes: + - backend_name: Name of memory pool backend + - bytes_allocated: Current allocated bytes + - max_memory: Maximum memory allocation + - total_bytes_allocated: Total bytes allocated + """ + + def allocate(self, size): + """Allocate memory buffer.""" + + def reallocate(self, buffer, old_size, new_size): + """Reallocate memory buffer.""" + + def free(self, buffer, size): + """Free memory buffer.""" + +class LoggingMemoryPool(MemoryPool): + """Memory pool wrapper with allocation logging.""" + +class ProxyMemoryPool(MemoryPool): + """Memory pool proxy for delegation.""" + +def logging_memory_pool(pool): + """ + Create logging wrapper for memory pool. + + Parameters: + - pool: MemoryPool, pool to wrap + + Returns: + LoggingMemoryPool: Logging memory pool wrapper + """ + +def proxy_memory_pool(pool): + """ + Create proxy wrapper for memory pool. + + Parameters: + - pool: MemoryPool, pool to proxy + + Returns: + ProxyMemoryPool: Proxy memory pool wrapper + """ +``` + +### Buffer Operations + +Low-level memory buffer operations for efficient data handling and zero-copy operations. + +```python { .api } +def allocate_buffer(size, alignment=None, memory_pool=None, resizable=False): + """ + Allocate new buffer. + + Parameters: + - size: int, buffer size in bytes + - alignment: int, memory alignment + - memory_pool: MemoryPool, memory pool to use + - resizable: bool, whether buffer is resizable + + Returns: + Buffer or ResizableBuffer: Allocated buffer + """ + +def foreign_buffer(address, size, base=None): + """ + Create buffer from foreign memory. + + Parameters: + - address: int, memory address + - size: int, buffer size + - base: object, object holding memory reference + + Returns: + Buffer: Buffer wrapping foreign memory + """ + +def py_buffer(obj): + """ + Create buffer from Python buffer protocol object. + + Parameters: + - obj: object implementing buffer protocol + + Returns: + Buffer: Buffer wrapping Python object + """ + +class Buffer: + """ + Immutable memory buffer. + + Attributes: + - address: Memory address + - is_mutable: Whether buffer is mutable + - size: Buffer size in bytes + """ + + def __len__(self): ... + def __getitem__(self, key): ... + + def equals(self, other): + """Check buffer equality.""" + + def slice(self, offset=0, length=None): + """Create buffer slice.""" + + def to_pybytes(self): + """Convert to Python bytes.""" + + def hex(self): + """Hexadecimal representation.""" + +class ResizableBuffer(Buffer): + """ + Mutable resizable memory buffer. + + Attributes: + - capacity: Buffer capacity + """ + + def resize(self, new_size, shrink_to_fit=True): + """Resize buffer.""" + + def reserve(self, capacity): + """Reserve buffer capacity.""" +``` + +### Compression + +Compression and decompression with support for multiple codecs and configurable compression levels. + +```python { .api } +def compress(data, codec=None, asbytes=False, memory_pool=None): + """ + Compress data. + + Parameters: + - data: bytes-like, data to compress + - codec: str or Codec, compression codec + - asbytes: bool, return bytes instead of Buffer + - memory_pool: MemoryPool, memory pool for allocation + + Returns: + Buffer or bytes: Compressed data + """ + +def decompress(data, decompressed_size=None, codec=None, memory_pool=None): + """ + Decompress data. + + Parameters: + - data: bytes-like, compressed data + - decompressed_size: int, expected decompressed size + - codec: str or Codec, compression codec + - memory_pool: MemoryPool, memory pool for allocation + + Returns: + Buffer: Decompressed data + """ + +class Codec: + """ + Compression codec interface. + + Attributes: + - name: Codec name + - compression_level: Compression level + """ + + @staticmethod + def is_available(codec_name): + """ + Check if codec is available. + + Parameters: + - codec_name: str, codec name + + Returns: + bool: Whether codec is available + """ + + def compress(self, data, memory_pool=None): + """Compress data.""" + + def decompress(self, data, decompressed_size=None, memory_pool=None): + """Decompress data.""" +``` + +### File I/O + +File interfaces and streaming I/O operations with support for various file types and compression. + +```python { .api } +def input_stream(source, compression='detect', buffer_size=None): + """ + Create input stream from various sources. + + Parameters: + - source: str, file path, or file-like object + - compression: str, compression type or 'detect' + - buffer_size: int, buffer size for reading + + Returns: + InputStream: Input stream for reading + """ + +def output_stream(source, compression=None, buffer_size=None): + """ + Create output stream. + + Parameters: + - source: str, file path, or file-like object + - compression: str, compression type + - buffer_size: int, buffer size for writing + + Returns: + OutputStream: Output stream for writing + """ + +def memory_map(path, mode='r'): + """ + Create memory map from file. + + Parameters: + - path: str, file path + - mode: str, access mode ('r', 'r+', 'w') + + Returns: + MemoryMappedFile: Memory mapped file + """ + +def create_memory_map(path, size): + """ + Create new memory mapped file. + + Parameters: + - path: str, file path + - size: int, file size + + Returns: + MemoryMappedFile: New memory mapped file + """ + +class NativeFile: + """ + Abstract base for native file implementations. + + Attributes: + - closed: Whether file is closed + - mode: File access mode + """ + + def close(self): ... + def closed(self): ... + def fileno(self): ... + def flush(self): ... + def isatty(self): ... + def readable(self): ... + def seekable(self): ... + def writable(self): ... + + def read(self, nbytes=None): ... + def read1(self, nbytes=None): ... + def readall(self): ... + def readinto(self, b): ... + def readline(self, size=None): ... + def readlines(self, hint=None): ... + + def seek(self, pos, whence=0): ... + def tell(self): ... + def truncate(self, size=None): ... + + def write(self, data): ... + def writelines(self, lines): ... + +class PythonFile(NativeFile): + """Wrapper for Python file-like objects.""" + +class OSFile(NativeFile): + """ + Operating system file with memory mapping support. + + Attributes: + - size: File size in bytes + """ + +class MemoryMappedFile(NativeFile): + """ + Memory-mapped file interface. + + Attributes: + - size: File size in bytes + """ + + def resize(self, new_size): + """Resize memory mapped file.""" + +class BufferedInputStream: + """Buffered input stream wrapper.""" + + def __init__(self, stream, buffer_size=None, memory_pool=None): ... + +class BufferedOutputStream: + """Buffered output stream wrapper.""" + + def __init__(self, stream, buffer_size=None, memory_pool=None): ... + +class CompressedInputStream: + """Compressed input stream.""" + + def __init__(self, stream, compression, memory_pool=None): ... + +class CompressedOutputStream: + """Compressed output stream.""" + + def __init__(self, stream, compression, memory_pool=None): ... + +class TransformInputStream: + """Transform input stream with custom function.""" + + def __init__(self, stream, transform_func): ... + +def transcoding_input_stream(stream, src_encoding, dest_encoding): + """ + Create character encoding transform stream. + + Parameters: + - stream: input stream + - src_encoding: str, source encoding + - dest_encoding: str, destination encoding + + Returns: + TransformInputStream: Transcoding stream + """ + +class FixedSizeBufferWriter: + """Writer to fixed-size buffer.""" + + def __init__(self, buffer): ... + +class BufferReader: + """Reader from buffer.""" + + def __init__(self, buffer): ... + +class BufferOutputStream: + """Output stream to growable buffer.""" + + def __init__(self, memory_pool=None): ... + + def getvalue(self): + """Get buffer contents.""" + +class MockOutputStream: + """Mock output stream for testing.""" + +class CacheOptions: + """Caching configuration for streams.""" + +def have_libhdfs(): + """ + Check if HDFS support is available. + + Returns: + bool: Whether libhdfs is available + """ +``` + +### Device Support + +Device abstraction for CPU and GPU memory management in heterogeneous computing environments. + +```python { .api } +class Device: + """ + Device abstraction for CPU/GPU memory. + + Attributes: + - device_type: Type of device + - device_id: Device identifier + """ + + def equals(self, other): + """Check device equality.""" + + def __str__(self): ... + +class DeviceAllocationType: + """Enumeration of device allocation types.""" + +class MemoryManager: + """ + Cross-device memory manager interface. + """ + + def allocate(self, size): + """Allocate device memory.""" + + def copy_non_owned(self, data, device=None, memory_pool=None): + """Copy data to device.""" + +def default_cpu_memory_manager(): + """ + Get default CPU memory manager. + + Returns: + MemoryManager: CPU memory manager + """ +``` + +## Usage Examples + +### Memory Pool Management + +```python +import pyarrow as pa + +# Check available memory backends +backends = pa.supported_memory_backends() +print(f"Available backends: {backends}") + +# Get default memory pool info +pool = pa.default_memory_pool() +print(f"Backend: {pool.backend_name}") +print(f"Allocated: {pool.bytes_allocated()} bytes") +print(f"Max memory: {pool.max_memory()} bytes") + +# Use different memory pools +if 'jemalloc' in backends: + jemalloc_pool = pa.jemalloc_memory_pool() + pa.set_memory_pool(jemalloc_pool) + print("Switched to jemalloc") + +# Create logging memory pool +logging_pool = pa.logging_memory_pool(pa.default_memory_pool()) +pa.set_memory_pool(logging_pool) + +# Enable memory allocation logging +pa.log_memory_allocations(True) + +# Create large array to see logging +large_array = pa.array(range(1000000)) +print(f"Created array with {len(large_array)} elements") + +# Check total allocation +total = pa.total_allocated_bytes() +print(f"Total allocated: {total} bytes") +``` + +### Buffer Operations + +```python +import pyarrow as pa + +# Allocate buffer +buffer = pa.allocate_buffer(1024) +print(f"Buffer size: {buffer.size}") +print(f"Buffer address: {buffer.address}") + +# Create resizable buffer +resizable = pa.allocate_buffer(512, resizable=True) +print(f"Initial capacity: {resizable.capacity}") + +# Resize buffer +resizable.resize(1024) +print(f"New size: {resizable.size}") +print(f"New capacity: {resizable.capacity}") + +# Create buffer from bytes +data = b"Hello, Arrow!" +py_buffer = pa.py_buffer(data) +print(f"Buffer from bytes: {py_buffer.to_pybytes()}") + +# Buffer slicing +slice_buffer = py_buffer.slice(7, 5) # "Arrow" +print(f"Sliced buffer: {slice_buffer.to_pybytes()}") + +# Foreign buffer (advanced usage) +import ctypes +c_array = (ctypes.c_byte * 10)(*range(10)) +foreign = pa.foreign_buffer( + ctypes.addressof(c_array), + ctypes.sizeof(c_array), + base=c_array # Keep reference +) +print(f"Foreign buffer: {foreign.to_pybytes()}") +``` + +### Compression + +```python +import pyarrow as pa + +# Check available codecs +codecs = ['gzip', 'snappy', 'lz4', 'zstd', 'brotli'] +available = [codec for codec in codecs if pa.Codec.is_available(codec)] +print(f"Available codecs: {available}") + +# Compress data +data = b"This is some test data to compress. " * 100 +original_size = len(data) + +for codec_name in available: + # Compress + compressed = pa.compress(data, codec=codec_name) + compressed_size = compressed.size + + # Decompress + decompressed = pa.decompress(compressed, codec=codec_name) + + # Verify + assert decompressed.to_pybytes() == data + + compression_ratio = original_size / compressed_size + print(f"{codec_name}: {original_size} -> {compressed_size} " + f"(ratio: {compression_ratio:.2f})") + +# Use Codec class directly +codec = pa.Codec('snappy') +compressed = codec.compress(data) +decompressed = codec.decompress(compressed) +print(f"Codec class: {len(decompressed.to_pybytes())} bytes") +``` + +### File I/O Operations + +```python +import pyarrow as pa +import tempfile +import os + +# Create sample data +table = pa.table({ + 'id': range(1000), + 'value': [x * 1.5 for x in range(1000)] +}) + +with tempfile.TemporaryDirectory() as temp_dir: + # Memory mapped file + mmap_path = os.path.join(temp_dir, 'mmap_test.bin') + + # Create memory mapped file + with pa.create_memory_map(mmap_path, 8192) as mmap_file: + # Write data + data = b"Memory mapped data " * 100 + mmap_file.write(data) + mmap_file.flush() + + # Read memory mapped file + with pa.memory_map(mmap_path, 'r') as mmap_file: + read_data = mmap_file.read() + print(f"Memory mapped read: {len(read_data)} bytes") + + # Compressed I/O + compressed_path = os.path.join(temp_dir, 'compressed.gz') + + # Write compressed + with pa.output_stream(compressed_path, compression='gzip') as out: + out.write(b"Compressed data " * 1000) + + # Read compressed + with pa.input_stream(compressed_path, compression='gzip') as inp: + read_compressed = inp.read() + print(f"Compressed read: {len(read_compressed)} bytes") + + # Buffer I/O + buffer_stream = pa.BufferOutputStream() + buffer_stream.write(b"Buffer stream data") + buffer_contents = buffer_stream.getvalue() + print(f"Buffer stream: {buffer_contents.to_pybytes()}") + + # Read from buffer + buffer_reader = pa.BufferReader(buffer_contents) + read_from_buffer = buffer_reader.read() + print(f"Buffer reader: {read_from_buffer}") + +# Transcoding example +text_data = "Hello, 世界! 🌍".encode('utf-8') +utf8_stream = pa.BufferReader(pa.py_buffer(text_data)) + +# Transcode UTF-8 to Latin-1 (this will fail for non-ASCII) +try: + transcoded_stream = pa.transcoding_input_stream( + utf8_stream, 'utf-8', 'latin-1' + ) + transcoded_data = transcoded_stream.read() + print(f"Transcoded: {transcoded_data}") +except Exception as e: + print(f"Transcoding failed: {e}") +``` + +### Advanced Memory Management + +```python +import pyarrow as pa +import gc + +def memory_usage_demo(): + # Track memory usage + initial_memory = pa.total_allocated_bytes() + + # Create large arrays + arrays = [] + for i in range(10): + arr = pa.array(range(100000)) + arrays.append(arr) + + peak_memory = pa.total_allocated_bytes() + print(f"Peak memory usage: {peak_memory - initial_memory} bytes") + + # Clear arrays + arrays.clear() + gc.collect() # Force garbage collection + + final_memory = pa.total_allocated_bytes() + print(f"Final memory usage: {final_memory - initial_memory} bytes") + +# Custom memory pool example +class TrackingMemoryPool(pa.MemoryPool): + """Example custom memory pool that tracks allocations.""" + + def __init__(self, base_pool): + self.base_pool = base_pool + self.allocation_count = 0 + self.deallocation_count = 0 + + @property + def backend_name(self): + return f"tracking_{self.base_pool.backend_name}" + + def bytes_allocated(self): + return self.base_pool.bytes_allocated() + + def max_memory(self): + return self.base_pool.max_memory() + + def allocate(self, size): + self.allocation_count += 1 + return self.base_pool.allocate(size) + + def free(self, buffer, size): + self.deallocation_count += 1 + return self.base_pool.free(buffer, size) + +# Use custom memory pool +base_pool = pa.default_memory_pool() +tracking_pool = TrackingMemoryPool(base_pool) + +# Note: Custom pools in Python have limitations +# This is more of a conceptual example +print(f"Custom pool backend: {tracking_pool.backend_name}") + +# Run memory usage demo +memory_usage_demo() +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pyarrow/tile.json b/.tessl/tiles/tessl/pypi-pyarrow/tile.json new file mode 100644 index 0000000..b469b5f --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pyarrow/tile.json @@ -0,0 +1,7 @@ +{ + "name": "tessl/pypi-pyarrow", + "version": "21.0.0", + "docs": "docs/index.md", + "describes": "pkg:pypi/pyarrow@21.0.0", + "summary": "Python library for Apache Arrow columnar memory format and computing libraries" +} \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pyrsistent/docs/core-collections.md b/.tessl/tiles/tessl/pypi-pyrsistent/docs/core-collections.md new file mode 100644 index 0000000..e81e4d4 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pyrsistent/docs/core-collections.md @@ -0,0 +1,412 @@ +# Core Persistent Collections + +Immutable alternatives to Python's built-in collections that use structural sharing for memory efficiency. All collections provide O(log32 n) performance for most operations and return new instances rather than modifying in-place. + +## Capabilities + +### PMap - Persistent Dictionary + +Immutable mapping similar to Python's dict with efficient updates and lookups. Supports all standard mapping operations plus additional persistent-specific methods. + +```python { .api } +def pmap(initial: Union[Mapping[KT, VT], Iterable[Tuple[KT, VT]]] = {}, pre_size: int = 0) -> PMap[KT, VT]: + """ + Create a persistent map from a mapping or iterable of key-value pairs. + + Parameters: + - initial: Initial mapping or iterable of (key, value) pairs + - pre_size: Expected size for optimization (optional) + + Returns: + PMap instance + """ + +def m(**kwargs) -> PMap[str, Any]: + """ + Create a persistent map from keyword arguments. + + Returns: + PMap instance with kwargs as key-value pairs + """ + +class PMap: + def get(self, key, default=None): + """Get value for key, return default if key not found.""" + + def set(self, key, value) -> 'PMap': + """Return new PMap with key set to value.""" + + def remove(self, key) -> 'PMap': + """Return new PMap without key. Raises KeyError if key missing.""" + + def discard(self, key) -> 'PMap': + """Return new PMap without key. Silent if key missing.""" + + def update(self, *mappings) -> 'PMap': + """Return new PMap with items from other mappings.""" + + def update_with(self, update_fn, *mappings) -> 'PMap': + """Return new PMap updated with merge function for conflicts.""" + + def evolver(self) -> 'PMapEvolver': + """Return mutable-like interface for efficient batch updates.""" + + def transform(self, *transformations) -> 'PMap': + """Apply path-based transformations to nested structure.""" + + def keys(self) -> 'PSet': + """Return PSet of keys.""" + + def values(self) -> 'PMapValues': + """Return view of values.""" + + def items(self) -> 'PMapItems': + """Return view of key-value pairs.""" + + def copy(self) -> 'PMap': + """Return self (immutable, so copy is identity).""" +``` + +### PVector - Persistent List + +Immutable sequence similar to Python's list with efficient append, random access, and slicing operations. + +```python { .api } +def pvector(iterable: Iterable = ()) -> PVector: + """ + Create a persistent vector from an iterable. + + Parameters: + - iterable: Items to include in vector + + Returns: + PVector instance + """ + +def v(*elements) -> PVector: + """ + Create a persistent vector from arguments. + + Returns: + PVector containing the arguments as elements + """ + +class PVector: + def append(self, element) -> 'PVector': + """Return new PVector with element appended.""" + + def extend(self, iterable) -> 'PVector': + """Return new PVector with elements from iterable appended.""" + + def set(self, index: int, value) -> 'PVector': + """Return new PVector with element at index replaced.""" + + def mset(self, *args) -> 'PVector': + """Multi-set: mset(index1, val1, index2, val2, ...)""" + + def delete(self, index: int, stop: int = None) -> 'PVector': + """Return new PVector with elements removed.""" + + def remove(self, value) -> 'PVector': + """Return new PVector with first occurrence of value removed.""" + + def evolver(self) -> 'PVectorEvolver': + """Return mutable-like interface for efficient batch updates.""" + + def transform(self, *transformations) -> 'PVector': + """Apply path-based transformations to nested structure.""" + + def tolist(self) -> list: + """Convert to Python list.""" + + def index(self, value, start: int = 0, stop: int = None) -> int: + """Return index of first occurrence of value.""" + + def count(self, value) -> int: + """Return number of occurrences of value.""" +``` + +### PSet - Persistent Set + +Immutable set with efficient membership testing, set operations, and updates. + +```python { .api } +def pset(iterable: Iterable = (), pre_size: int = 8) -> PSet: + """ + Create a persistent set from an iterable. + + Parameters: + - iterable: Items to include in set + - pre_size: Expected size for optimization + + Returns: + PSet instance + """ + +def s(*elements) -> PSet: + """ + Create a persistent set from arguments. + + Returns: + PSet containing the arguments as elements + """ + +class PSet: + def add(self, element) -> 'PSet': + """Return new PSet with element added.""" + + def update(self, iterable) -> 'PSet': + """Return new PSet with elements from iterable added.""" + + def remove(self, element) -> 'PSet': + """Return new PSet without element. Raises KeyError if missing.""" + + def discard(self, element) -> 'PSet': + """Return new PSet without element. Silent if missing.""" + + def evolver(self) -> 'PSetEvolver': + """Return mutable-like interface for efficient batch updates.""" + + def union(self, *others) -> 'PSet': + """Return new PSet with elements from all sets.""" + + def intersection(self, *others) -> 'PSet': + """Return new PSet with elements common to all sets.""" + + def difference(self, *others) -> 'PSet': + """Return new PSet with elements not in other sets.""" + + def symmetric_difference(self, other) -> 'PSet': + """Return new PSet with elements in either set but not both.""" + + def issubset(self, other) -> bool: + """Test whether every element is in other.""" + + def issuperset(self, other) -> bool: + """Test whether every element in other is in this set.""" + + def isdisjoint(self, other) -> bool: + """Return True if sets have no elements in common.""" + + def copy(self) -> 'PSet': + """Return self (immutable, so copy is identity).""" +``` + +### PBag - Persistent Multiset + +Immutable multiset (bag) that allows duplicate elements and tracks element counts. + +```python { .api } +def pbag(elements: Iterable) -> PBag: + """ + Create a persistent bag from an iterable. + + Parameters: + - elements: Items to include (duplicates allowed) + + Returns: + PBag instance + """ + +def b(*elements) -> PBag: + """ + Create a persistent bag from arguments. + + Returns: + PBag containing the arguments as elements + """ + +class PBag: + def add(self, element) -> 'PBag': + """Return new PBag with element added (increments count).""" + + def remove(self, element) -> 'PBag': + """Return new PBag with one occurrence of element removed.""" + + def count(self, element) -> int: + """Return number of occurrences of element.""" + + def update(self, iterable) -> 'PBag': + """Return new PBag with elements from iterable added.""" +``` + +### PList - Persistent Linked List + +Immutable singly-linked list optimized for prepending operations and functional programming patterns. + +```python { .api } +def plist(iterable: Iterable = (), reverse: bool = False) -> PList: + """ + Create a persistent linked list from an iterable. + + Parameters: + - iterable: Items to include + - reverse: If True, reverse the order + + Returns: + PList instance + """ + +def l(*elements) -> PList: + """ + Create a persistent linked list from arguments. + + Returns: + PList containing the arguments as elements + """ + +class PList: + def cons(self, element) -> 'PList': + """Return new PList with element prepended.""" + + def mcons(self, iterable) -> 'PList': + """Return new PList with elements from iterable prepended.""" + + @property + def first(self): + """First element of the list.""" + + @property + def rest(self) -> 'PList': + """PList of remaining elements after first.""" + + def reverse(self) -> 'PList': + """Return new PList in reverse order.""" + + def remove(self, element) -> 'PList': + """Return new PList with first occurrence of element removed.""" + + def split(self, index: int) -> tuple: + """Return tuple of (left, right) PLists split at index.""" +``` + +### PDeque - Persistent Double-Ended Queue + +Immutable double-ended queue with efficient operations at both ends and optional maximum length. + +```python { .api } +def pdeque(iterable: Iterable = None, maxlen: int = None) -> PDeque: + """ + Create a persistent deque from an iterable. + + Parameters: + - iterable: Items to include + - maxlen: Maximum length (None for unlimited) + + Returns: + PDeque instance + """ + +def dq(*elements) -> PDeque: + """ + Create a persistent deque from arguments. + + Returns: + PDeque containing the arguments as elements + """ + +class PDeque: + def append(self, element) -> 'PDeque': + """Return new PDeque with element appended to right.""" + + def appendleft(self, element) -> 'PDeque': + """Return new PDeque with element prepended to left.""" + + def extend(self, iterable) -> 'PDeque': + """Return new PDeque with elements from iterable appended.""" + + def extendleft(self, iterable) -> 'PDeque': + """Return new PDeque with elements from iterable prepended.""" + + def pop(self, count: int = 1) -> 'PDeque': + """Return new PDeque with count elements removed from right.""" + + def popleft(self, count: int = 1) -> 'PDeque': + """Return new PDeque with count elements removed from left.""" + + @property + def left(self): + """Leftmost element.""" + + @property + def right(self): + """Rightmost element.""" + + @property + def maxlen(self) -> int: + """Maximum length (None if unlimited).""" + + def rotate(self, steps: int) -> 'PDeque': + """Return new PDeque rotated by steps.""" + + def reverse(self) -> 'PDeque': + """Return new PDeque in reverse order.""" + + def remove(self, element) -> 'PDeque': + """Return new PDeque with first occurrence of element removed.""" +``` + +## Factory Function Shortcuts + +Convenient single-character aliases for creating collections: + +```python { .api } +def m(**kwargs) -> PMap[str, Any]: ... # pmap from keyword arguments +def v(*elements: T) -> PVector[T]: ... # pvector from arguments +def s(*elements: T) -> PSet[T]: ... # pset from arguments +def b(*elements: T) -> PBag[T]: ... # pbag from arguments +def l(*elements: T) -> PList[T]: ... # plist from arguments +def dq(*elements: T) -> PDeque[T]: ... # pdeque from arguments +``` + +## Evolver Interfaces + +All main collections provide `.evolver()` methods returning mutable-like interfaces for efficient batch updates: + +```python { .api } +class PMapEvolver: + def __setitem__(self, key, value) -> None: ... + def __delitem__(self, key) -> None: ... + def set(self, key, value) -> 'PMapEvolver': ... + def remove(self, key) -> 'PMapEvolver': ... + def is_dirty(self) -> bool: ... + def persistent(self) -> PMap: ... + +class PVectorEvolver: + def __setitem__(self, index: int, value) -> None: ... + def __delitem__(self, index: int) -> None: ... + def append(self, value) -> 'PVectorEvolver': ... + def extend(self, iterable) -> 'PVectorEvolver': ... + def set(self, index: int, value) -> 'PVectorEvolver': ... + def delete(self, value) -> 'PVectorEvolver': ... + def is_dirty(self) -> bool: ... + def persistent(self) -> PVector: ... + +class PSetEvolver: + def add(self, element) -> 'PSetEvolver': ... + def remove(self, element) -> 'PSetEvolver': ... + def is_dirty(self) -> bool: ... + def persistent(self) -> PSet: ... +``` + +## Usage Examples + +```python +# Efficient batch updates using evolvers +pm = pmap({'a': 1, 'b': 2}) +evolver = pm.evolver() +evolver['c'] = 3 +evolver['d'] = 4 +del evolver['a'] +new_pm = evolver.persistent() # Get immutable result + +# Chaining operations +pv = pvector([1, 2, 3]) +result = pv.append(4).extend([5, 6]).set(0, 0) # pvector([0, 2, 3, 4, 5, 6]) + +# Set operations +ps1 = pset([1, 2, 3]) +ps2 = pset([3, 4, 5]) +union = ps1.union(ps2) # pset([1, 2, 3, 4, 5]) +intersection = ps1 & ps2 # pset([3]) - using operator +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pyrsistent/docs/index.md b/.tessl/tiles/tessl/pypi-pyrsistent/docs/index.md new file mode 100644 index 0000000..3d534b9 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pyrsistent/docs/index.md @@ -0,0 +1,145 @@ +# Pyrsistent + +A comprehensive Python library providing persistent/immutable data structures. Pyrsistent enables functional programming patterns by offering immutable alternatives to Python's built-in collections (dict, list, set) that never modify in-place but return new instances with requested changes, enabling safer concurrent programming and easier reasoning about program state. + +## Package Information + +- **Package Name**: pyrsistent +- **Language**: Python +- **Installation**: `pip install pyrsistent` + +## Core Imports + +```python +import pyrsistent +``` + +Common imports for working with specific data structures: + +```python +from pyrsistent import pmap, pvector, pset, freeze, thaw +``` + +For type checking and records: + +```python +from pyrsistent import PRecord, PClass, field +``` + +For type annotations: + +```python +from typing import Union, Mapping, Iterable, Tuple, TypeVar, Any +KT = TypeVar('KT') # Key type +VT = TypeVar('VT') # Value type +T = TypeVar('T') # Element type +``` + +## Basic Usage + +```python +from pyrsistent import pmap, pvector, pset, freeze, thaw + +# Create persistent collections +pm = pmap({'name': 'John', 'age': 30}) +pv = pvector([1, 2, 3, 4, 5]) +ps = pset([1, 2, 3, 3, 4]) # Duplicate 3 is automatically removed + +# All operations return new instances +pm2 = pm.set('age', 31) # pm is unchanged +pv2 = pv.append(6) # pv is unchanged +ps2 = ps.add(5) # ps is unchanged + +print(pm2) # pmap({'name': 'John', 'age': 31}) +print(pv2) # pvector([1, 2, 3, 4, 5, 6]) +print(ps2) # pset([1, 2, 3, 4, 5]) + +# Convert between mutable and immutable +regular_dict = {'a': 1, 'b': [2, 3], 'c': {4, 5}} +persistent = freeze(regular_dict) # Recursively converts to persistent +mutable = thaw(persistent) # Recursively converts back to mutable +``` + +## Architecture + +Pyrsistent provides two main categories of persistent data structures: + +- **Core Collections**: Immutable versions of Python's built-in collections with structural sharing for memory efficiency +- **Type-Checked Collections**: Enhanced versions with runtime type validation and invariant checking +- **Record Types**: Fixed-schema data structures for structured data modeling + +All collections use structural sharing through Hash Array Mapped Tries (HAMT) and similar data structures, enabling O(log32 n) performance for most operations while minimizing memory usage. + +## Capabilities + +### Core Persistent Collections + +Immutable alternatives to Python's built-in collections including persistent map (dict), vector (list), set, bag (multiset), list (linked), and deque (double-ended queue). All operations return new instances with structural sharing for efficiency. + +```python { .api } +def pmap(initial: Union[Mapping[KT, VT], Iterable[Tuple[KT, VT]]] = {}, pre_size: int = 0) -> PMap[KT, VT]: ... +def pvector(iterable: Iterable[T] = ()) -> PVector[T]: ... +def pset(iterable: Iterable[T] = (), pre_size: int = 8) -> PSet[T]: ... +def pbag(elements: Iterable[T]) -> PBag[T]: ... +def plist(iterable: Iterable[T] = (), reverse: bool = False) -> PList[T]: ... +def pdeque(iterable: Iterable[T] = None, maxlen: int = None) -> PDeque[T]: ... +``` + +[Core Collections](./core-collections.md) + +### Type-Checked Collections + +Runtime type validation for persistent collections with optional invariant checking. Provides CheckedPMap, CheckedPVector, and CheckedPSet with customizable type constraints and validation rules. + +```python { .api } +class CheckedPMap(PMap): + __key_type__: type + __value_type__: type + +class CheckedPVector(PVector): + __type__: type + +class CheckedPSet(PSet): + __type__: type + +def optional(*types) -> tuple: ... +``` + +[Type-Checked Collections](./type-checked-collections.md) + +### Records and Classes + +Structured data types with fixed schemas, type checking, and serialization support. PRecord provides a dict-like interface while PClass provides an object-like interface, both with field specifications and validation. + +```python { .api } +class PRecord(PMap): + def set(self, *args, **kwargs) -> 'PRecord': ... + @classmethod + def create(cls, kwargs: dict, ignore_extra: bool = False) -> 'PRecord': ... + def serialize(self, format=None) -> dict: ... + +class PClass: + def set(self, *args, **kwargs) -> 'PClass': ... + @classmethod + def create(cls, kwargs: dict, ignore_extra: bool = False) -> 'PClass': ... + def serialize(self, format=None) -> dict: ... + +def field(type=(), invariant=..., initial=..., mandatory: bool = False, factory=..., serializer=...) -> 'PField': ... +``` + +[Records and Classes](./records-and-classes.md) + +### Utilities and Transformations + +Helper functions for converting between mutable and immutable structures, applying transformations, and accessing nested data. Includes freeze/thaw conversion, transformation functions, and nested access utilities. + +```python { .api } +def freeze(obj, strict: bool = True): ... +def thaw(obj, strict: bool = True): ... +def mutant(fn) -> callable: ... +def get_in(keys: Iterable, coll: Mapping, default=None, no_default: bool = False): ... +def inc(x: int) -> int: ... +def discard(evolver, key) -> None: ... +``` + +[Utilities and Transformations](./utilities.md) \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pyrsistent/docs/records-and-classes.md b/.tessl/tiles/tessl/pypi-pyrsistent/docs/records-and-classes.md new file mode 100644 index 0000000..7ebdf66 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pyrsistent/docs/records-and-classes.md @@ -0,0 +1,430 @@ +# Records and Classes + +Structured data types with fixed schemas, type checking, and serialization support. PRecord provides a dict-like interface while PClass provides an object-like interface, both with field specifications and validation. + +## Capabilities + +### PRecord - Persistent Record + +Dict-like persistent data structure with fixed schema and field validation. Extends PMap with type checking and field constraints. + +```python { .api } +class PRecord(PMap): + """ + Persistent record with fixed fields and optional validation. + + Define fields as class attributes using field() specifications. + """ + + _precord_fields: dict + _precord_initial_values: dict + + def __init__(self, **kwargs): ... + + def set(self, *args, **kwargs) -> 'PRecord': + """ + Set one or more fields, returning new PRecord instance. + + Supports both positional (key, value) and keyword arguments. + Unlike PMap.set(), accepts multiple key-value pairs. + """ + + @classmethod + def create(cls, kwargs: dict, _factory_fields=None, ignore_extra: bool = False) -> 'PRecord': + """ + Create PRecord instance from dictionary with validation. + + Parameters: + - kwargs: Dictionary of field values + - ignore_extra: If True, ignore fields not defined in schema + + Returns: + New PRecord instance + """ + + def serialize(self, format=None) -> dict: + """ + Serialize record using field serializers. + + Parameters: + - format: Optional format parameter passed to field serializers + + Returns: + Dictionary with serialized field values + """ + + def discard(self, key) -> 'PRecord': + """Return new PRecord without specified field.""" + + def remove(self, key) -> 'PRecord': + """Return new PRecord without specified field (raises KeyError if missing).""" +``` + +### PClass - Persistent Class + +Object-like persistent data structure with fixed schema and field validation. Provides attribute-style access to fields. + +```python { .api } +class PClass: + """ + Persistent class with fixed fields and optional validation. + + Define fields as class attributes using field() specifications. + Provides object-style attribute access. + """ + + def __new__(cls, **kwargs): ... + + def set(self, *args, **kwargs) -> 'PClass': + """ + Set one or more fields, returning new PClass instance. + + Supports both positional (name, value) and keyword arguments. + """ + + @classmethod + def create(cls, kwargs: dict, _factory_fields=None, ignore_extra: bool = False) -> 'PClass': + """ + Create PClass instance from dictionary with validation. + + Parameters: + - kwargs: Dictionary of field values + - ignore_extra: If True, ignore fields not defined in schema + + Returns: + New PClass instance + """ + + def serialize(self, format=None) -> dict: + """ + Serialize class using field serializers. + + Parameters: + - format: Optional format parameter passed to field serializers + + Returns: + Dictionary with serialized field values + """ + + def transform(self, *transformations) -> 'PClass': + """Apply path-based transformations to nested structure.""" + + def evolver(self) -> 'PClassEvolver': + """Return mutable-like interface for efficient batch updates.""" + + def remove(self, name: str) -> 'PClass': + """Return new PClass instance without specified field.""" + +class PClassMeta(type): + """Metaclass for PClass that processes field definitions.""" + +class PClassEvolver: + """Mutable-like interface for efficient PClass updates.""" + + def __init__(self, original: PClass, initial_dict: dict): ... + def __getitem__(self, item): ... + def __setitem__(self, key, value): ... + def __delitem__(self, item): ... + def set(self, key, value) -> 'PClassEvolver': ... + def remove(self, item) -> 'PClassEvolver': ... + def persistent(self) -> PClass: ... + def __getattr__(self, item): ... +``` + +## Field Specifications + +### General Field Definition + +Define field schemas with type checking, validation, defaults, and serialization. + +```python { .api } +def field( + type=(), + invariant=lambda _: (True, None), + initial=object(), + mandatory: bool = False, + factory=lambda x: x, + serializer=lambda _, value: value +) -> 'PField': + """ + Define a field specification for PRecord or PClass. + + Parameters: + - type: Required type(s) - single type, tuple of types, or empty tuple for any + - invariant: Validation function returning (bool, error_msg) tuple + - initial: Default value (use object() for no default) + - mandatory: If True, field must be provided during creation + - factory: Function to transform input values + - serializer: Function to transform values during serialization + + Returns: + PField specification object + """ +``` + +### Specialized Collection Fields + +Pre-configured field types for persistent collections with type checking. + +```python { .api } +def pset_field( + item_type, + optional: bool = False, + initial=(), + invariant=lambda _: (True, None), + item_invariant=lambda _: (True, None) +) -> 'PField': + """ + Create a field that holds a type-checked PSet. + + Parameters: + - item_type: Required type for set elements + - optional: If True, field can be None + - initial: Default PSet contents + - invariant: Additional validation function for the field + - item_invariant: Additional validation function for individual items + + Returns: + PField for CheckedPSet + """ + +def pvector_field( + item_type, + optional: bool = False, + initial=(), + invariant=lambda _: (True, None), + item_invariant=lambda _: (True, None) +) -> 'PField': + """ + Create a field that holds a type-checked PVector. + + Parameters: + - item_type: Required type for vector elements + - optional: If True, field can be None + - initial: Default PVector contents + - invariant: Additional validation function for the field + - item_invariant: Additional validation function for individual items + + Returns: + PField for CheckedPVector + """ + +def pmap_field( + key_type, + value_type, + optional: bool = False, + initial=None, + invariant=lambda _: (True, None) +) -> 'PField': + """ + Create a field that holds a type-checked PMap. + + Parameters: + - key_type: Required type for map keys + - value_type: Required type for map values + - optional: If True, field can be None + - initial: Default PMap contents (defaults to empty pmap) + - invariant: Additional validation function + + Returns: + PField for CheckedPMap + """ + +class PField: + """Field specification object (internal use).""" +``` + +## Exception Classes + +```python { .api } +class PTypeError(TypeError): + """ + Type error for record/class fields. + + Attributes: + - source_class: Class that raised the error + - field: Field name that caused the error + - expected_types: Tuple of expected types + - actual_type: Actual type that was provided + """ + + source_class: type + field: str + expected_types: tuple + actual_type: type +``` + +## Usage Examples + +### Basic PRecord Usage + +```python +from pyrsistent import PRecord, field + +class Person(PRecord): + name = field(type=str, mandatory=True) + age = field(type=int, initial=0) + email = field(type=str, initial='') + +# Create instances +person = Person(name='Alice', age=30, email='alice@example.com') +person2 = Person(name='Bob') # Uses default age=0, email='' + +# Update fields (returns new instance) +older_person = person.set(age=31) +updated_person = person.set(age=31, email='alice@newdomain.com') + +# Access like a dictionary +print(person['name']) # 'Alice' +print(person.get('age', 0)) # 30 + +# Validation happens automatically +try: + Person(name=123) # Invalid type for name +except PTypeError as e: + print(f"Type error in field {e.field}: expected {e.expected_types}, got {e.actual_type}") +``` + +### Basic PClass Usage + +```python +from pyrsistent import PClass, field + +class Point(PClass): + x = field(type=(int, float), initial=0) + y = field(type=(int, float), initial=0) + +# Create instances +point = Point(x=1, y=2) +origin = Point() # Uses defaults x=0, y=0 + +# Update fields (returns new instance) +moved_point = point.set(x=5, y=10) + +# Access like object attributes +print(point.x) # 1 +print(point.y) # 2 + +# Attribute-style access +distance_squared = point.x**2 + point.y**2 +``` + +### Advanced Field Specifications + +```python +from pyrsistent import PRecord, field, pset_field, pvector_field + +class Product(PRecord): + name = field( + type=str, + mandatory=True + ) + price = field( + type=(int, float), + mandatory=True, + invariant=lambda price: (price > 0, "Price must be positive") + ) + tags = pset_field( + item_type=str, + initial=pset() + ) + reviews = pvector_field( + item_type=str, + initial=pvector() + ) + metadata = field( + type=dict, + initial={}, + factory=lambda d: d.copy(), # Ensure we get a copy + serializer=lambda _, value: dict(value) # Convert to regular dict for JSON + ) + +# Create product +product = Product( + name='Laptop', + price=999.99, + tags=pset(['electronics', 'computers']), + reviews=pvector(['Great laptop!', 'Fast delivery']) +) + +# Validation works +try: + Product(name='Invalid', price=-100) # Negative price +except InvariantException as e: + print(f"Invariant failed: {e}") +``` + +### Serialization + +```python +class User(PRecord): + username = field(type=str, mandatory=True) + created_at = field( + type=str, + factory=lambda dt: dt.isoformat() if hasattr(dt, 'isoformat') else str(dt), + serializer=lambda _, value: value # Already converted by factory + ) + preferences = field( + type=dict, + initial={}, + serializer=lambda _, prefs: {k: v for k, v in prefs.items() if v is not None} + ) + +from datetime import datetime +user = User( + username='alice', + created_at=datetime.now(), + preferences={'theme': 'dark', 'notifications': True, 'temp': None} +) + +# Serialize for JSON/API output +user_data = user.serialize() +# {'username': 'alice', 'created_at': '2023-...', 'preferences': {'theme': 'dark', 'notifications': True}} +``` + +### Factory Methods and Error Handling + +```python +# Use create() for construction from external data +external_data = { + 'name': 'Alice', + 'age': '30', # String that needs conversion + 'unknown_field': 'ignored' +} + +class StrictPerson(PRecord): + name = field(type=str, mandatory=True) + age = field(type=int, factory=int) # Convert strings to int + +# Ignore extra fields +person = StrictPerson.create(external_data, ignore_extra=True) + +# Or handle unknown fields +try: + StrictPerson.create(external_data) +except Exception as e: + print(f"Unknown field error: {e}") +``` + +### Evolvers for Batch Updates + +```python +class Config(PClass): + debug = field(type=bool, initial=False) + port = field(type=int, initial=8080) + host = field(type=str, initial='localhost') + +config = Config() + +# Efficient batch updates +evolver = config.evolver() +evolver.debug = True +evolver.port = 3000 +evolver.host = '0.0.0.0' +new_config = evolver.persistent() + +# Or using set method +evolver2 = config.evolver() +evolver2.set('debug', True).set('port', 3000) +new_config2 = evolver2.persistent() +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pyrsistent/docs/type-checked-collections.md b/.tessl/tiles/tessl/pypi-pyrsistent/docs/type-checked-collections.md new file mode 100644 index 0000000..0224a1d --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pyrsistent/docs/type-checked-collections.md @@ -0,0 +1,266 @@ +# Type-Checked Collections + +Runtime type validation for persistent collections with optional invariant checking. These collections provide the same functionality as core collections but with automatic type checking and validation. + +## Capabilities + +### CheckedPMap - Type-Checked Persistent Map + +Persistent map with runtime validation of key and value types. Raises CheckedKeyTypeError or CheckedValueTypeError when invalid types are used. + +```python { .api } +class CheckedPMap(PMap): + """ + Type-checked persistent map with key and value type validation. + + Class attributes: + - __key_type__: Required key type or tuple of allowed types + - __value_type__: Required value type or tuple of allowed types + """ + + __key_type__: type + __value_type__: type + + def __new__(cls, source: Mapping = {}, size: int = 0) -> 'CheckedPMap': ... + + @classmethod + def create(cls, source_data: Mapping, _factory_fields=None) -> 'CheckedPMap': + """Create instance with type validation.""" + + def serialize(self, format=None) -> dict: + """Serialize to regular dict.""" +``` + +### CheckedPVector - Type-Checked Persistent Vector + +Persistent vector with runtime validation of element types. + +```python { .api } +class CheckedPVector(PVector): + """ + Type-checked persistent vector with element type validation. + + Class attributes: + - __type__: Required element type or tuple of allowed types + """ + + __type__: type + + def __new__(cls, initial: Iterable = ()) -> 'CheckedPVector': ... + + @classmethod + def create(cls, source_data: Iterable, _factory_fields=None) -> 'CheckedPVector': + """Create instance with type validation.""" + + def serialize(self, format=None) -> list: + """Serialize to regular list.""" +``` + +### CheckedPSet - Type-Checked Persistent Set + +Persistent set with runtime validation of element types. + +```python { .api } +class CheckedPSet(PSet): + """ + Type-checked persistent set with element type validation. + + Class attributes: + - __type__: Required element type or tuple of allowed types + """ + + __type__: type + + def __new__(cls, initial: Iterable = ()) -> 'CheckedPSet': ... + + @classmethod + def create(cls, source_data: Iterable, _factory_fields=None) -> 'CheckedPSet': + """Create instance with type validation.""" + + def serialize(self, format=None) -> set: + """Serialize to regular set.""" +``` + +### CheckedType Base Class + +Abstract base class for all type-checked collections providing common validation infrastructure. + +```python { .api } +class CheckedType: + """Abstract base class for type-checked collections.""" + + @classmethod + def create(cls, source_data, _factory_fields=None): + """Factory method for creating instances with validation.""" + + def serialize(self, format=None): + """Serialize to corresponding Python built-in type.""" +``` + +## Type Specification Functions + +### Optional Types + +Allow a field or collection to accept specified types or None. + +```python { .api } +def optional(*types) -> tuple: + """ + Create a type specification that allows specified types or None. + + Parameters: + - *types: Types to allow (in addition to None) + + Returns: + Tuple of types including None + + Example: + optional(int, str) -> (int, str, type(None)) + """ +``` + +## Exception Classes + +Type checking exceptions raised when validation fails: + +```python { .api } +class InvariantException(Exception): + """ + Raised when invariant validation fails. + + Attributes: + - invariant_errors: Tuple of validation error messages + - missing_fields: Tuple of missing required field names + """ + + invariant_errors: tuple + missing_fields: tuple + +class CheckedTypeError(TypeError): + """ + Base exception for type validation errors in checked collections. + + Attributes: + - source_class: Class that raised the error + - expected_types: Tuple of expected types + - actual_type: Actual type that was provided + - actual_value: The value that caused the error + """ + + source_class: type + expected_types: tuple + actual_type: type + actual_value: object + +class CheckedKeyTypeError(CheckedTypeError): + """Raised when CheckedPMap receives invalid key type.""" + +class CheckedValueTypeError(CheckedTypeError): + """Raised when CheckedPMap/CheckedPVector/CheckedPSet receives invalid value/element type.""" +``` + +## Usage Examples + +### Basic Type Checking + +```python +from pyrsistent import CheckedPMap, CheckedPVector, CheckedPSet, optional + +# Define a type-checked map for string keys and integer values +class StringIntMap(CheckedPMap): + __key_type__ = str + __value_type__ = int + +# Create and use the map +sim = StringIntMap({'a': 1, 'b': 2}) +sim2 = sim.set('c', 3) # OK +# sim.set(123, 4) # Raises CheckedKeyTypeError +# sim.set('d', 'invalid') # Raises CheckedValueTypeError + +# Define a type-checked vector for integers +class IntVector(CheckedPVector): + __type__ = int + +iv = IntVector([1, 2, 3]) +iv2 = iv.append(4) # OK +# iv.append('invalid') # Raises CheckedValueTypeError + +# Define a type-checked set with optional types +class MixedSet(CheckedPSet): + __type__ = optional(int, str) # Allows int, str, or None + +ms = MixedSet([1, 'hello', None, 2]) # OK +# ms.add(3.14) # Raises CheckedValueTypeError +``` + +### Custom Invariants + +```python +# Type-checked collections can include custom validation +class PositiveIntVector(CheckedPVector): + __type__ = int + + def __new__(cls, initial=()): + # Custom validation in constructor + for item in initial: + if not isinstance(item, int) or item <= 0: + raise ValueError("All elements must be positive integers") + return super().__new__(cls, initial) + +piv = PositiveIntVector([1, 2, 3]) # OK +# PositiveIntVector([1, -2, 3]) # Raises ValueError +``` + +### Serialization + +```python +# Type-checked collections can be serialized to regular Python types +class PersonMap(CheckedPMap): + __key_type__ = str + __value_type__ = (str, int) + +pm = PersonMap({'name': 'Alice', 'age': 30}) +regular_dict = pm.serialize() # Returns {'name': 'Alice', 'age': 30} +print(type(regular_dict)) # +``` + +### Factory Methods + +```python +# Use create() class method for explicit construction with validation +data = {'valid_key': 42} +validated_map = StringIntMap.create(data) + +# Validation happens during creation +try: + invalid_data = {123: 'invalid'} + StringIntMap.create(invalid_data) +except CheckedKeyTypeError as e: + print(f"Key type error: {e}") +``` + +### Error Handling + +```python +from pyrsistent import CheckedKeyTypeError, CheckedValueTypeError + +class TypedData(CheckedPMap): + __key_type__ = str + __value_type__ = (int, float) + +try: + td = TypedData() + td = td.set(42, 'invalid') # Wrong key type +except CheckedKeyTypeError as e: + print(f"Expected key types: {e.expected_types}") + print(f"Actual key type: {e.actual_type}") + print(f"Actual key value: {e.actual_value}") + +try: + td = TypedData() + td = td.set('key', 'invalid') # Wrong value type +except CheckedValueTypeError as e: + print(f"Expected value types: {e.expected_types}") + print(f"Actual value type: {e.actual_type}") + print(f"Actual value: {e.actual_value}") +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pyrsistent/docs/utilities.md b/.tessl/tiles/tessl/pypi-pyrsistent/docs/utilities.md new file mode 100644 index 0000000..ce0db56 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pyrsistent/docs/utilities.md @@ -0,0 +1,378 @@ +# Utilities and Transformations + +Helper functions for converting between mutable and immutable structures, applying transformations, and accessing nested data. These utilities bridge the gap between regular Python data structures and persistent collections. + +## Capabilities + +### Freeze/Thaw Conversions + +Convert between mutable Python built-in types and persistent collections, enabling easy integration with existing codebases. + +```python { .api } +def freeze(obj): + """ + Recursively convert mutable Python structures to persistent equivalents. + + Conversions: + - dict -> pmap + - list -> pvector + - set -> pset + - tuple -> tuple (preserved) + - Other types -> unchanged + + Parameters: + - obj: Object to convert + + Returns: + Persistent version of the input structure + """ + +def thaw(obj): + """ + Recursively convert persistent structures to mutable Python equivalents. + + Conversions: + - pmap -> dict + - pvector -> list + - pset -> set + - tuple -> tuple (preserved) + - Other types -> unchanged + + Parameters: + - obj: Object to convert + + Returns: + Mutable version of the input structure + """ + +def mutant(fn) -> callable: + """ + Decorator that automatically freezes function arguments and return value. + + Useful for integrating persistent data structures with functions that + expect mutable inputs or for ensuring immutability of function results. + + Parameters: + - fn: Function to decorate + + Returns: + Decorated function that freezes args and return value + """ +``` + +### Nested Data Access + +Safely access and manipulate nested data structures using key paths. + +```python { .api } +def get_in(keys: Iterable, coll: Mapping, default=None, no_default: bool = False): + """ + Get value from nested mapping structure using sequence of keys. + + Equivalent to coll[keys[0]][keys[1]]...[keys[n]] with safe fallback. + + Parameters: + - keys: Sequence of keys for nested access + - coll: Nested mapping structure to access + - default: Value to return if path doesn't exist + - no_default: If True, raise KeyError instead of returning default + + Returns: + Value at the nested path, or default if path doesn't exist + + Raises: + KeyError: If path doesn't exist and no_default=True + """ +``` + +### Transformation Functions + +Functions for use with the `.transform()` method of persistent collections to apply path-based modifications. + +```python { .api } +def inc(x: int) -> int: + """ + Increment numeric value by 1. + + Transformation function for use with .transform(). + + Parameters: + - x: Numeric value to increment + + Returns: + x + 1 + """ + +def discard(evolver, key) -> None: + """ + Remove element from evolver during transformation. + + Transformation function that removes a key/element from the + collection being transformed. + + Parameters: + - evolver: Collection evolver (PMapEvolver, PVectorEvolver, PSetEvolver) + - key: Key/index/element to remove + """ + +def rex(expr: str) -> callable: + """ + Create regex matcher for transformation paths. + + Returns a function that tests if a string matches the regex pattern. + Useful for selecting which keys/paths to transform. + + Parameters: + - expr: Regular expression pattern + + Returns: + Function that tests strings against the regex + """ + +def ny(_) -> bool: + """ + Matcher that always returns True. + + Useful as a catch-all matcher in transformations when you want + to match any value. + + Parameters: + - _: Any value (ignored) + + Returns: + True (always) + """ +``` + +### Immutable Factory + +Create namedtuple-like immutable classes with optional field validation. + +```python { .api } +def immutable( + members: Union[str, Iterable[str]] = '', + name: str = 'Immutable', + verbose: bool = False +) -> type: + """ + Create an immutable namedtuple-like class. + + Creates a class similar to collections.namedtuple but with additional + immutability guarantees and optional field validation. + + Parameters: + - members: Field names as string (space/comma separated) or iterable + - name: Name for the created class + - verbose: If True, print the generated class definition + + Returns: + Immutable class type with specified fields + """ +``` + +## Usage Examples + +### Freeze/Thaw Conversions + +```python +from pyrsistent import freeze, thaw, pmap, pvector + +# Convert nested mutable structures to persistent +mutable_data = { + 'users': [ + {'name': 'Alice', 'tags': {'admin', 'active'}}, + {'name': 'Bob', 'tags': {'user', 'active'}} + ], + 'config': { + 'debug': True, + 'features': ['auth', 'logging'] + } +} + +# Recursively convert to persistent structures +persistent_data = freeze(mutable_data) +# Result: pmap({ +# 'users': pvector([ +# pmap({'name': 'Alice', 'tags': pset(['admin', 'active'])}), +# pmap({'name': 'Bob', 'tags': pset(['user', 'active'])}) +# ]), +# 'config': pmap({ +# 'debug': True, +# 'features': pvector(['auth', 'logging']) +# }) +# }) + +# Convert back to mutable for JSON serialization or external APIs +mutable_again = thaw(persistent_data) +import json +json_str = json.dumps(mutable_again) +``` + +### Mutant Decorator + +```python +from pyrsistent import mutant, pmap + +@mutant +def process_user_data(data): + """ + Function that expects and returns mutable data, but we want to + use persistent structures internally for safety. + """ + # data is automatically frozen (converted to persistent) + # Work with persistent data safely + if 'email' not in data: + data = data.set('email', '') + + data = data.set('processed', True) + + # Return value is automatically frozen + return data + +# Use with mutable input - automatically converted +user_dict = {'name': 'Alice', 'age': 30} +result = process_user_data(user_dict) +# result is a pmap, input dict is unchanged +``` + +### Nested Access + +```python +from pyrsistent import get_in, pmap, pvector + +# Complex nested structure +data = pmap({ + 'api': pmap({ + 'v1': pmap({ + 'endpoints': pvector([ + pmap({'path': '/users', 'methods': pvector(['GET', 'POST'])}), + pmap({'path': '/posts', 'methods': pvector(['GET', 'POST', 'DELETE'])}) + ]) + }) + }), + 'config': pmap({ + 'database': pmap({ + 'host': 'localhost', + 'port': 5432 + }) + }) +}) + +# Safe nested access +db_host = get_in(['config', 'database', 'host'], data) # 'localhost' +api_endpoints = get_in(['api', 'v1', 'endpoints'], data) # pvector([...]) +missing = get_in(['config', 'cache', 'ttl'], data, default=300) # 300 + +# Access with index for vectors +first_endpoint = get_in(['api', 'v1', 'endpoints', 0, 'path'], data) # '/users' + +# Raise error if path doesn't exist +try: + get_in(['nonexistent', 'path'], data, no_default=True) +except KeyError: + print("Path not found") +``` + +### Transformations + +```python +from pyrsistent import pmap, pvector, inc, discard, rex, ny + +# Apply transformations to nested structures +data = pmap({ + 'counters': pmap({'page_views': 100, 'api_calls': 50}), + 'users': pvector(['alice', 'bob', 'charlie']), + 'temp_data': 'to_be_removed' +}) + +# Increment all counters +transformed = data.transform( + ['counters', ny], inc # For any key in counters, apply inc function +) +# Result: counters become {'page_views': 101, 'api_calls': 51} + +# Remove elements matching pattern +transformed2 = data.transform( + [rex(r'temp_.*')], discard # Remove any key matching temp_* +) +# Result: 'temp_data' key is removed + +# Complex transformation combining multiple operations +def process_user(user): + return user.upper() if isinstance(user, str) else user + +transformed3 = data.transform( + ['users', ny], process_user, # Transform all users + ['counters', 'page_views'], lambda x: x * 2, # Double page views + ['temp_data'], discard # Remove temp data +) +``` + +### Immutable Classes + +```python +from pyrsistent import immutable + +# Create immutable point class +Point = immutable('x y', name='Point') +p1 = Point(x=1, y=2) +p2 = Point(3, 4) # Positional args also work + +print(p1.x, p1.y) # 1 2 +print(p1) # Point(x=1, y=2) + +# Immutable - cannot modify +try: + p1.x = 5 # Raises AttributeError +except AttributeError: + print("Cannot modify immutable object") + +# Create new instances with _replace +p3 = p1._replace(x=10) # Point(x=10, y=2) + +# With more complex fields +Person = immutable('name age email', name='Person') +person = Person('Alice', 30, 'alice@example.com') + +# Support for tuple unpacking +name, age, email = person +``` + +### Integration Patterns + +```python +from pyrsistent import freeze, thaw, pmap +import json + +# Working with JSON APIs +def load_config(filename): + """Load configuration from JSON file into persistent structure.""" + with open(filename) as f: + mutable_config = json.load(f) + return freeze(mutable_config) + +def save_config(config, filename): + """Save persistent configuration to JSON file.""" + with open(filename, 'w') as f: + json.dump(thaw(config), f, indent=2) + +# Thread-safe configuration management +class ConfigManager: + def __init__(self, initial_config): + self._config = freeze(initial_config) + + def get_config(self): + return self._config # Safe to share between threads + + def update_config(self, updates): + # Atomic update - no race conditions + self._config = self._config.update(freeze(updates)) + + def get_setting(self, *path): + return get_in(path, self._config) + +# Usage +config_mgr = ConfigManager({'database': {'host': 'localhost', 'port': 5432}}) +db_host = config_mgr.get_setting('database', 'host') +config_mgr.update_config({'database': {'timeout': 30}}) +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-pyrsistent/tile.json b/.tessl/tiles/tessl/pypi-pyrsistent/tile.json new file mode 100644 index 0000000..9a4107e --- /dev/null +++ b/.tessl/tiles/tessl/pypi-pyrsistent/tile.json @@ -0,0 +1,7 @@ +{ + "name": "tessl/pypi-pyrsistent", + "version": "0.20.0", + "docs": "docs/index.md", + "describes": "pkg:pypi/pyrsistent@0.20.0", + "summary": "Persistent/Functional/Immutable data structures for Python" +} \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-tensorflow/docs/core.md b/.tessl/tiles/tessl/pypi-tensorflow/docs/core.md new file mode 100644 index 0000000..05deb2f --- /dev/null +++ b/.tessl/tiles/tessl/pypi-tensorflow/docs/core.md @@ -0,0 +1,379 @@ +# Core Tensor Operations + +Fundamental tensor creation, manipulation, and mathematical operations that form the foundation of TensorFlow computations. These operations provide the building blocks for constructing complex machine learning models and numerical computations. + +## Capabilities + +### Tensor Creation + +Create tensors from various data sources including constants, variables, and Python data structures. + +```python { .api } +def constant(value, dtype=None, shape=None, name="Const"): + """ + Creates a constant tensor from tensor-like objects. + + Parameters: + - value: A constant value (or list) of output type dtype or a list of values of type dtype + - dtype: The type of the elements of the resulting tensor + - shape: Optional dimensions of resulting tensor + - name: Optional name for the operation + + Returns: + A Constant Tensor + """ + +def Variable(initial_value, trainable=None, validate_shape=True, + caching_device=None, name=None, variable_def=None, dtype=None, + import_scope=None, constraint=None, synchronization=tf.VariableSynchronization.AUTO, + aggregation=tf.VariableAggregation.NONE, shape=None, + experimental_enable_variable_lifting=True): + """ + Creates a new variable with value initial_value. + + Parameters: + - initial_value: A Tensor, or Python object convertible to a Tensor + - trainable: If True, GradientTapes automatically watch uses of this Variable + - validate_shape: If False, allows the variable to be initialized with a value of unknown shape + - caching_device: Optional device string describing where the Variable should be cached + - name: Optional name for the variable + - dtype: If set, initial_value will be converted to the given type + - constraint: An optional projection function to be applied to the variable after being updated + - synchronization: Indicates when a distributed a variable will be aggregated + - aggregation: Indicates how a distributed variable will be aggregated + - shape: The shape of this variable + - experimental_enable_variable_lifting: Whether to enable variable lifting optimization + + Returns: + A Variable + """ + +def zeros(shape, dtype=tf.float32, name=None): + """ + Creates a tensor with all elements set to zero. + + Parameters: + - shape: A list of integers, a tuple of integers, or a 1-D Tensor of type int32 + - dtype: The type of an element in the resulting Tensor + - name: A name for the operation + + Returns: + A Tensor with all elements set to zero + """ + +def ones(shape, dtype=tf.float32, name=None): + """ + Creates a tensor with all elements set to one. + + Parameters: + - shape: A list of integers, a tuple of integers, or a 1-D Tensor of type int32 + - dtype: The type of an element in the resulting Tensor + - name: A name for the operation + + Returns: + A Tensor with all elements set to one + """ + +def fill(dims, value, name=None): + """ + Creates a tensor filled with a scalar value. + + Parameters: + - dims: A Tensor of type int32. 1-D. Represents the shape of the output tensor + - value: A Tensor. 0-D (scalar). Value to fill the returned tensor + - name: A name for the operation + + Returns: + A Tensor + """ + +def reshape(tensor, shape, name=None): + """ + Reshapes a tensor. + + Parameters: + - tensor: A Tensor + - shape: A Tensor of type int32. Defines the shape of the output tensor + - name: A name for the operation + + Returns: + A Tensor. Has the same type as tensor + """ + +def transpose(a, perm=None, conjugate=False, name="transpose"): + """ + Transposes a tensor. + + Parameters: + - a: A Tensor + - perm: A permutation of the dimensions of a + - conjugate: Setting it to True is mathematically equivalent to tf.math.conj(tf.transpose(input)) + - name: A name for the operation + + Returns: + A transposed Tensor + """ + +def eye(num_rows, num_columns=None, batch_shape=None, dtype=tf.float32, name=None): + """ + Construct an identity matrix, or a batch of matrices. + + Parameters: + - num_rows: Non-negative int32 scalar Tensor giving the number of rows in each batch matrix + - num_columns: Optional non-negative int32 scalar Tensor giving the number of columns + - batch_shape: A list or tuple of Python integers or a 1-D int32 Tensor + - dtype: The type of an element in the resulting Tensor + - name: A name for this Op + + Returns: + A Tensor of shape batch_shape + [num_rows, num_columns] + """ +``` + +### Type Conversion and Casting + +Convert tensors between different data types and formats. + +```python { .api } +def convert_to_tensor(value, dtype=None, dtype_hint=None, name=None): + """ + Converts the given value to a Tensor. + + Parameters: + - value: An object whose type has a registered Tensor conversion function + - dtype: Optional element type for the returned tensor + - dtype_hint: Optional element type for the returned tensor, used when dtype is None + - name: Optional name to use if a new Tensor is created + + Returns: + A Tensor based on value + """ + +def cast(x, dtype, name=None): + """ + Casts a tensor to a new type. + + Parameters: + - x: A Tensor or SparseTensor or IndexedSlices of numeric type + - dtype: The destination type + - name: A name for the operation + + Returns: + A Tensor or SparseTensor or IndexedSlices with same shape as x and requested dtype + """ + +def to_float(x, name="ToFloat"): + """ + Casts a tensor to type float32. + + Parameters: + - x: A Tensor or SparseTensor or IndexedSlices + - name: A name for the operation + + Returns: + A Tensor or SparseTensor or IndexedSlices with same shape as x with type float32 + """ + +def to_double(x, name="ToDouble"): + """ + Casts a tensor to type float64. + + Parameters: + - x: A Tensor or SparseTensor or IndexedSlices + - name: A name for the operation + + Returns: + A Tensor or SparseTensor or IndexedSlices with same shape as x with type float64 + """ + +def to_int32(x, name="ToInt32"): + """ + Casts a tensor to type int32. + + Parameters: + - x: A Tensor or SparseTensor or IndexedSlices + - name: A name for the operation + + Returns: + A Tensor or SparseTensor or IndexedSlices with same shape as x with type int32 + """ + +def to_int64(x, name="ToInt64"): + """ + Casts a tensor to type int64. + + Parameters: + - x: A Tensor or SparseTensor or IndexedSlices + - name: A name for the operation + + Returns: + A Tensor or SparseTensor or IndexedSlices with same shape as x with type int64 + """ +``` + +### Shape Manipulation + +Operations for inspecting and manipulating tensor shapes and dimensions. + +```python { .api } +def shape(input, name=None, out_type=tf.int32): + """ + Returns the shape of a tensor. + + Parameters: + - input: A Tensor or SparseTensor + - name: A name for the operation + - out_type: The desired output type + + Returns: + A Tensor of type out_type + """ + +def size(input, name=None, out_type=tf.int32): + """ + Returns the size of a tensor. + + Parameters: + - input: A Tensor or SparseTensor + - name: A name for the operation + - out_type: The desired output type + + Returns: + A Tensor of type out_type + """ + +def rank(input, name=None): + """ + Returns the rank of a tensor. + + Parameters: + - input: A Tensor or SparseTensor + - name: A name for the operation + + Returns: + A Tensor of type int32 + """ + +def reshape(tensor, shape, name=None): + """ + Reshapes a tensor. + + Parameters: + - tensor: A Tensor + - shape: A Tensor of type int32. Defines the shape of the output tensor + - name: A name for the operation + + Returns: + A Tensor. Has the same type as tensor + """ + +def squeeze(input, axis=None, name=None): + """ + Removes dimensions of size 1 from the shape of a tensor. + + Parameters: + - input: A Tensor. The input to squeeze + - axis: An optional list of ints. If specified, only squeezes the dimensions listed + - name: A name for the operation + + Returns: + A Tensor. Has the same type as input + """ + +def expand_dims(input, axis, name=None): + """ + Inserts a dimension of 1 into a tensor's shape. + + Parameters: + - input: A Tensor + - axis: 0-D (scalar). Specifies the dimension index at which to expand + - name: A name for the operation + + Returns: + A Tensor with the same data as input, but its shape has an additional dimension of size 1 + """ +``` + +### Tensor Utilities + +Utility functions for tensor inspection and manipulation. + +```python { .api } +def identity(input, name=None): + """ + Return a tensor with the same shape and contents as input. + + Parameters: + - input: A Tensor + - name: A name for this operation + + Returns: + A Tensor. Has the same type as input + """ + +def stop_gradient(input, name=None): + """ + Stops gradient computation. + + Parameters: + - input: A Tensor or IndexedSlices + - name: A name for the operation + + Returns: + A Tensor or IndexedSlices. Has the same type as input + """ + +def is_tensor(x): + """ + Checks whether x is a tensor or "tensor-like". + + Parameters: + - x: A python object to check + + Returns: + True if x is a tensor or "tensor-like", False if not + """ + +def name_scope(name, default_name=None, values=None): + """ + A context manager for use when defining a Python op. + + Parameters: + - name: The name argument that is passed to the op function + - default_name: The default name to use if the name argument is None + - values: The list of Tensor arguments that are passed to the op function + + Returns: + A context manager that yields the current name scope + """ +``` + +## Usage Examples + +```python +import tensorflow as tf + +# Create tensors +x = tf.constant([1, 2, 3, 4]) +y = tf.Variable([1.0, 2.0, 3.0, 4.0]) +z = tf.zeros((2, 3)) +identity_matrix = tf.eye(4) + +# Type conversion +x_float = tf.cast(x, tf.float32) +x_tensor = tf.convert_to_tensor([1, 2, 3]) + +# Shape manipulation +print(tf.shape(x)) # [4] +print(tf.size(x)) # 4 +print(tf.rank(x)) # 1 + +# Reshape operations +reshaped = tf.reshape(x, (2, 2)) +expanded = tf.expand_dims(x, axis=0) # Shape: (1, 4) +squeezed = tf.squeeze(expanded) # Shape: (4,) + +# Utilities +identity_x = tf.identity(x) +no_grad_x = tf.stop_gradient(x) +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-tensorflow/docs/data.md b/.tessl/tiles/tessl/pypi-tensorflow/docs/data.md new file mode 100644 index 0000000..6214548 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-tensorflow/docs/data.md @@ -0,0 +1,437 @@ +# Data Processing + +Dataset creation, transformation, and preprocessing pipeline operations for efficient data handling and training workflows. The tf.data API provides powerful tools for building scalable input pipelines. + +## Capabilities + +### Dataset Creation + +Create datasets from various data sources. + +```python { .api } +class Dataset: + """A potentially large set of elements.""" + + @staticmethod + def from_tensor_slices(tensors, name=None): + """ + Creates a Dataset whose elements are slices of the given tensors. + + Parameters: + - tensors: A dataset element, whose components have the same first dimension + - name: Optional name for the tf.data operation + + Returns: + A Dataset + """ + + @staticmethod + def from_tensors(tensors, name=None): + """ + Creates a Dataset with a single element, comprising the given tensors. + + Parameters: + - tensors: A dataset element + - name: Optional name for the tf.data operation + + Returns: + A Dataset + """ + + @staticmethod + def from_generator(generator, output_signature, args=None): + """ + Creates a Dataset whose elements are generated by generator. + + Parameters: + - generator: A callable object that returns an object that supports the iter() protocol + - output_signature: A nested structure of tf.TypeSpec objects corresponding to each component of an element yielded by generator + - args: A tf.Tensor object or a tuple of tf.Tensor objects to pass as arguments to generator + + Returns: + A Dataset + """ + + @staticmethod + def range(*args, **kwargs): + """ + Creates a Dataset of a step-separated range of values. + + Parameters: + - *args: follows the same semantics as python's xrange + - **kwargs: optional keyword arguments + + Returns: + A RangeDataset + """ + + @staticmethod + def zip(datasets): + """ + Creates a Dataset by zipping together the given datasets. + + Parameters: + - datasets: A nested structure of datasets + + Returns: + A Dataset + """ +``` + +### Dataset Transformation + +Transform and manipulate dataset elements. + +```python { .api } +def map(self, map_func, num_parallel_calls=None, deterministic=None, name=None): + """ + Maps map_func across the elements of this dataset. + + Parameters: + - map_func: A function mapping a dataset element to another dataset element + - num_parallel_calls: A tf.int32 scalar tf.Tensor, representing the number elements to process asynchronously in parallel + - deterministic: A boolean controlling whether the map is allowed to return elements out of order + - name: Optional name for the tf.data operation + + Returns: + A Dataset + """ + +def filter(self, predicate, name=None): + """ + Filters this dataset according to predicate. + + Parameters: + - predicate: A function mapping a dataset element to a boolean + - name: Optional name for the tf.data operation + + Returns: + A Dataset + """ + +def flat_map(self, map_func, name=None): + """ + Maps map_func across this dataset and flattens the result. + + Parameters: + - map_func: A function mapping a dataset element to a dataset + - name: Optional name for the tf.data operation + + Returns: + A Dataset + """ + +def interleave(self, map_func, cycle_length=None, block_length=None, + num_parallel_calls=None, deterministic=None, name=None): + """ + Maps map_func across this dataset, and interleaves the results. + + Parameters: + - map_func: A function mapping a dataset element to a dataset + - cycle_length: The number of input elements that will be processed concurrently + - block_length: The number of consecutive elements to produce from each input element before cycling to another input element + - num_parallel_calls: The number of parallel calls for map_func + - deterministic: A boolean controlling whether the interleave is allowed to return elements out of order + - name: Optional name for the tf.data operation + + Returns: + A Dataset + """ +``` + +### Dataset Batching and Sampling + +Operations for batching and sampling data. + +```python { .api } +def batch(self, batch_size, drop_remainder=False, num_parallel_calls=None, + deterministic=None, name=None): + """ + Combines consecutive elements of this dataset into batches. + + Parameters: + - batch_size: A tf.int64 scalar tf.Tensor, representing the number of consecutive elements of this dataset to combine in a single batch + - drop_remainder: A tf.bool scalar tf.Tensor, representing whether the last batch should be dropped in the case it has fewer than batch_size elements + - num_parallel_calls: A tf.int32 scalar tf.Tensor, representing the number elements to process in parallel + - deterministic: A boolean controlling whether the batch is allowed to return elements out of order + - name: Optional name for the tf.data operation + + Returns: + A Dataset + """ + +def padded_batch(self, batch_size, padded_shapes=None, padding_values=None, + drop_remainder=False, name=None): + """ + Combines consecutive elements of this dataset into padded batches. + + Parameters: + - batch_size: A tf.int64 scalar tf.Tensor, representing the number of consecutive elements of this dataset to combine in a single batch + - padded_shapes: A nested structure of tf.TensorShape or tf.int64 vector tensor-like objects representing the shape to which the respective component of each input element should be padded prior to batching + - padding_values: A nested structure of scalar-shaped tf.Tensor, representing the padding values to use for the respective components + - drop_remainder: A tf.bool scalar tf.Tensor, representing whether the last batch should be dropped in the case it has fewer than batch_size elements + - name: Optional name for the tf.data operation + + Returns: + A Dataset + """ + +def unbatch(self, name=None): + """ + Splits elements of a dataset into multiple elements on the batch dimension. + + Parameters: + - name: Optional name for the tf.data operation + + Returns: + A Dataset + """ + +def shuffle(self, buffer_size, seed=None, reshuffle_each_iteration=None, name=None): + """ + Randomly shuffles the elements of this dataset. + + Parameters: + - buffer_size: A tf.int64 scalar tf.Tensor, representing the number of elements from this dataset from which the new dataset will sample + - seed: Optional tf.int64 scalar tf.Tensor, representing the random seed that will be used to create the distribution + - reshuffle_each_iteration: If true, the dataset will be reshuffled each time it is iterated over + - name: Optional name for the tf.data operation + + Returns: + A Dataset + """ + +def repeat(self, count=None, name=None): + """ + Repeats this dataset so each original value is seen count times. + + Parameters: + - count: A tf.int64 scalar tf.Tensor, representing the number of times the dataset should be repeated + - name: Optional name for the tf.data operation + + Returns: + A Dataset + """ + +def take(self, count, name=None): + """ + Creates a Dataset with at most count elements from this dataset. + + Parameters: + - count: A tf.int64 scalar tf.Tensor, representing the number of elements of this dataset that should be taken to form the new dataset + - name: Optional name for the tf.data operation + + Returns: + A Dataset + """ + +def skip(self, count, name=None): + """ + Creates a Dataset that skips count elements from this dataset. + + Parameters: + - count: A tf.int64 scalar tf.Tensor, representing the number of elements of this dataset that should be skipped to form the new dataset + - name: Optional name for the tf.data operation + + Returns: + A Dataset + """ +``` + +### Performance Optimization + +Operations for optimizing dataset performance. + +```python { .api } +def cache(self, filename="", name=None): + """ + Caches the elements in this dataset. + + Parameters: + - filename: A tf.string scalar tf.Tensor, representing the name of a directory on the filesystem to use for caching elements in this Dataset + - name: Optional name for the tf.data operation + + Returns: + A Dataset + """ + +def prefetch(self, buffer_size, name=None): + """ + Creates a Dataset that prefetches elements from this dataset. + + Parameters: + - buffer_size: A tf.int64 scalar tf.Tensor, representing the maximum number of elements that will be buffered when prefetching + - name: Optional name for the tf.data operation + + Returns: + A Dataset + """ + +def parallel_interleave(map_func, cycle_length, block_length=1, + sloppy=False, buffer_output_elements=None, + prefetch_input_elements=None): + """ + A parallel version of the Dataset.interleave() transformation. + + Parameters: + - map_func: A function mapping a nested structure of tensors to a Dataset + - cycle_length: The number of input elements that will be processed concurrently + - block_length: The number of consecutive elements to produce from each input element before cycling to another input element + - sloppy: If false, the relative order of records produced by this transformation is deterministic + - buffer_output_elements: The number of elements each iterator being interleaved should buffer + - prefetch_input_elements: The number of input elements to transform to iterators in parallel and keep buffered + + Returns: + A Dataset transformation function + """ +``` + +### Dataset Properties and Utilities + +Utility methods for inspecting and manipulating datasets. + +```python { .api } +@property +def element_spec(self): + """ + The type specification of an element of this dataset. + + Returns: + A nested structure of tf.TypeSpec objects matching the structure of an element of this dataset + """ + +def cardinality(self): + """ + Returns the cardinality of the dataset, if known. + + Returns: + A scalar tf.int64 Tensor representing the cardinality of the dataset + """ + +def enumerate(self, start=0, name=None): + """ + Enumerates the elements of this dataset. + + Parameters: + - start: A tf.int64 scalar tf.Tensor, representing the start value for enumeration + - name: Optional name for the tf.data operation + + Returns: + A Dataset + """ + +def concatenate(self, dataset): + """ + Creates a Dataset by concatenating the given dataset with this dataset. + + Parameters: + - dataset: Dataset to be concatenated + + Returns: + A Dataset + """ + +def reduce(self, initial_state, reduce_func, name=None): + """ + Reduces the input dataset to a single element. + + Parameters: + - initial_state: An element representing the initial state of the reduction + - reduce_func: A function that maps (old_state, input_element) to new_state + - name: Optional name for the tf.data operation + + Returns: + A dataset element + """ + +def apply(self, transformation_func): + """ + Applies a transformation function to this dataset. + + Parameters: + - transformation_func: A function that takes one Dataset argument and returns a Dataset + + Returns: + The Dataset returned by applying transformation_func to this dataset + """ +``` + +## Usage Examples + +```python +import tensorflow as tf +import numpy as np + +# Create datasets from different sources +# From tensor slices +data = np.array([1, 2, 3, 4, 5]) +dataset = tf.data.Dataset.from_tensor_slices(data) + +# From tensors (single element) +single_element = tf.data.Dataset.from_tensors([1, 2, 3, 4, 5]) + +# From generator +def gen(): + for i in range(100): + yield i + +dataset_gen = tf.data.Dataset.from_generator( + gen, + output_signature=tf.TensorSpec(shape=(), dtype=tf.int32) +) + +# Range dataset +range_dataset = tf.data.Dataset.range(10) + +# Dataset transformations +# Map transformation +squared_dataset = dataset.map(lambda x: x ** 2) + +# Filter transformation +even_dataset = range_dataset.filter(lambda x: x % 2 == 0) + +# Batch transformation +batched_dataset = range_dataset.batch(3) + +# Shuffle and repeat +shuffled_dataset = range_dataset.shuffle(buffer_size=10).repeat(2) + +# Complex pipeline example +(train_images, train_labels) = np.random.random((1000, 28, 28, 1)), np.random.randint(0, 10, 1000) + +train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels)) +train_dataset = (train_dataset + .map(lambda x, y: (tf.cast(x, tf.float32) / 255.0, y)) # Normalize + .shuffle(buffer_size=100) + .batch(32) + .prefetch(tf.data.AUTOTUNE)) + +# Performance optimizations +# Cache dataset +cached_dataset = train_dataset.cache() + +# Prefetch for performance +prefetched_dataset = train_dataset.prefetch(tf.data.AUTOTUNE) + +# Parallel map +parallel_mapped = range_dataset.map( + lambda x: x * 2, + num_parallel_calls=tf.data.AUTOTUNE +) + +# Text processing example +text_data = ["hello world", "tensorflow data", "machine learning"] +text_dataset = tf.data.Dataset.from_tensor_slices(text_data) + +# Split text into words +word_dataset = text_dataset.flat_map( + lambda x: tf.data.Dataset.from_tensor_slices(tf.strings.split(x)) +) + +# Iterate through dataset +for element in range_dataset.take(5): + print(element.numpy()) + +# Convert dataset to list (for small datasets) +dataset_list = list(range_dataset.take(5).as_numpy_iterator()) +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-tensorflow/docs/distribute.md b/.tessl/tiles/tessl/pypi-tensorflow/docs/distribute.md new file mode 100644 index 0000000..3725cac --- /dev/null +++ b/.tessl/tiles/tessl/pypi-tensorflow/docs/distribute.md @@ -0,0 +1,382 @@ +# Distribution Strategies + +Multi-device and multi-worker training strategies for scaling machine learning workloads across GPUs and TPUs. These strategies enable efficient distributed training and deployment. + +## Capabilities + +### Strategy Classes + +Core distribution strategy classes for different distributed training scenarios. + +```python { .api } +class Strategy: + """ + Base class for distribution strategies. + + Methods: + - scope(): Returns a context manager selecting this Strategy as current + - run(fn, args=(), kwargs=None, options=None): Invokes fn on each replica, with the given arguments + - reduce(reduce_op, value, axis): Reduce value across replicas and return result on current device + - gather(value, axis): Gather value across replicas along axis to current device + """ + +class MirroredStrategy(Strategy): + """ + Synchronous training across multiple replicas on one machine. + + This strategy is typically used for training on one machine with multiple GPUs. + Variables and updates will be mirrored across all replicas. + + Parameters: + - devices: Optional list of device strings or device objects. If not specified, all visible GPUs are used + - cross_device_ops: Optional, a ReduceOp specifying how to combine values + """ + +class MultiWorkerMirroredStrategy(Strategy): + """ + Synchronous training across multiple workers, each with potentially multiple replicas. + + This strategy implements synchronous distributed training across multiple workers, + each of which may have multiple GPUs. Similar to MirroredStrategy, it replicates + all variables and computations to each local replica. + + Parameters: + - cluster_resolver: Optional cluster resolver + - communication_options: Optional, communication options for CollectiveOps + """ + +class TPUStrategy(Strategy): + """ + Synchronous training on TPUs and TPU Pods. + + This strategy is for running on TPUs, including TPU pods which can scale + to hundreds or thousands of cores. + + Parameters: + - tpu_cluster_resolver: A TPUClusterResolver, which provides information about the TPU cluster + - experimental_device_assignment: Optional, a DeviceAssignment to run replicas on + - experimental_spmd_xla_partitioning: Optional boolean for using SPMD-style sharding + """ + +class OneDeviceStrategy(Strategy): + """ + A distribution strategy for running on a single device. + + Using this strategy will place any variables created in its scope on the specified device. + Input distributed through this strategy will be prefetched to the specified device. + + Parameters: + - device: Device string identifier for the device on which the variables should be placed + """ + +class CentralStorageStrategy(Strategy): + """ + A one-machine strategy that puts all variables on a single device. + + Variables are assigned to local CPU and operations are replicated across + all local GPUs. If there is only one GPU, operations will run on that GPU. + + Parameters: + - compute_devices: Optional list of device strings for placing operations + - parameter_device: Optional device string for placing variables + """ + +class ParameterServerStrategy(Strategy): + """ + An asynchronous multi-worker parameter server strategy. + + Parameter server training is a common data-parallel method to scale up a + machine learning model on multiple machines. + + Parameters: + - cluster_resolver: A ClusterResolver object specifying cluster configuration + - variable_partitioner: Optional callable for partitioning variables across parameter servers + """ +``` + +### Strategy Context and Execution + +Methods for running code within distribution strategy contexts. + +```python { .api } +def scope(self): + """ + Context manager to make the strategy current and distribute variables created in scope. + + Returns: + A context manager + """ + +def run(self, fn, args=(), kwargs=None, options=None): + """ + Invokes fn on each replica, with the given arguments. + + Parameters: + - fn: The function to run on each replica + - args: Optional positional arguments to fn + - kwargs: Optional keyword arguments to fn + - options: Optional RunOptions specifying the options to run fn + + Returns: + Merged return value of fn across replicas + """ + +def reduce(self, reduce_op, value, axis=None): + """ + Reduce value across replicas and return result on current device. + + Parameters: + - reduce_op: A ReduceOp value specifying how values should be combined + - value: A "per replica" value, e.g. returned by run + - axis: Specifies the dimension to reduce along within each replica's tensor + + Returns: + A Tensor + """ + +def gather(self, value, axis): + """ + Gather value across replicas along axis to current device. + + Parameters: + - value: A "per replica" value, e.g. returned by Strategy.run + - axis: 0-D int32 Tensor. Dimension along which to gather + + Returns: + A Tensor that's the concatenation of value across replicas along axis dimension + """ +``` + +### Distribution Utilities + +Utility functions for working with distributed training. + +```python { .api } +def get_strategy(): + """ + Returns the current tf.distribute.Strategy object. + + Returns: + A Strategy object. Inside a with strategy.scope() block, returns strategy, + otherwise returns the default (single-replica) strategy + """ + +def has_strategy(): + """ + Return if there is a current non-default tf.distribute.Strategy. + + Returns: + True if inside a with strategy.scope() block for a non-default strategy + """ + +def in_cross_replica_context(): + """ + Returns True if in a cross-replica context. + + Returns: + True if in a cross-replica context, False if in a replica context + """ + +def get_replica_context(): + """ + Returns the current tf.distribute.ReplicaContext or None. + + Returns: + The current ReplicaContext object when in a replica context, else None + """ + +def experimental_set_strategy(strategy): + """ + Set a tf.distribute.Strategy as current without with strategy.scope(). + + Parameters: + - strategy: A tf.distribute.Strategy object or None + """ +``` + +### Reduce Operations + +Operations for combining values across replicas. + +```python { .api } +class ReduceOp: + """Indicates how a set of values should be reduced.""" + + SUM = "SUM" # Sum across replicas + MEAN = "MEAN" # Mean across replicas + MIN = "MIN" # Minimum across replicas + MAX = "MAX" # Maximum across replicas + +class CrossDeviceOps: + """Base class for cross-device reduction and broadcasting algorithms.""" + + def reduce(self, reduce_op, per_replica_value, destinations): + """ + Reduce per_replica_value to destinations. + + Parameters: + - reduce_op: Indicates how per_replica_value will be reduced + - per_replica_value: A PerReplica object or a tensor with device placement + - destinations: The return value will be copied to these destinations + + Returns: + A tensor or PerReplica object + """ + + def broadcast(self, tensor, destinations): + """ + Broadcast tensor to destinations. + + Parameters: + - tensor: The tensor to broadcast + - destinations: The broadcast destinations + + Returns: + A tensor or PerReplica object + """ +``` + +## Usage Examples + +```python +import tensorflow as tf +import numpy as np + +# Single GPU strategy +strategy = tf.distribute.OneDeviceStrategy("/gpu:0") + +# Multi-GPU strategy (automatic GPU detection) +strategy = tf.distribute.MirroredStrategy() + +# Explicit device specification +strategy = tf.distribute.MirroredStrategy(devices=["/gpu:0", "/gpu:1"]) + +# Multi-worker strategy (requires cluster setup) +strategy = tf.distribute.MultiWorkerMirroredStrategy() + +# Create and compile model within strategy scope +with strategy.scope(): + model = tf.keras.Sequential([ + tf.keras.layers.Dense(128, activation='relu', input_shape=(10,)), + tf.keras.layers.Dense(64, activation='relu'), + tf.keras.layers.Dense(1, activation='sigmoid') + ]) + + model.compile(optimizer='adam', + loss='binary_crossentropy', + metrics=['accuracy']) + +# Prepare distributed dataset +def make_dataset(): + x = np.random.random((1000, 10)) + y = np.random.randint(2, size=(1000, 1)) + dataset = tf.data.Dataset.from_tensor_slices((x, y)) + return dataset.batch(32) + +# Distribute dataset across replicas +dataset = make_dataset() +dist_dataset = strategy.experimental_distribute_dataset(dataset) + +# Custom training loop with strategy +with strategy.scope(): + # Define loss and metrics + loss_object = tf.keras.losses.BinaryCrossentropy( + from_logits=False, + reduction=tf.keras.losses.Reduction.NONE + ) + + def compute_loss(labels, predictions): + per_example_loss = loss_object(labels, predictions) + return tf.nn.compute_average_loss(per_example_loss, global_batch_size=32) + + train_accuracy = tf.keras.metrics.BinaryAccuracy() + + optimizer = tf.keras.optimizers.Adam() + + # Training step function + def train_step(inputs): + features, labels = inputs + + with tf.GradientTape() as tape: + predictions = model(features, training=True) + loss = compute_loss(labels, predictions) + + gradients = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients(zip(gradients, model.trainable_variables)) + + train_accuracy.update_state(labels, predictions) + return loss + + # Distributed training step + @tf.function + def distributed_train_step(dataset_inputs): + per_replica_losses = strategy.run(train_step, args=(dataset_inputs,)) + return strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None) + + # Training loop + for epoch in range(5): + total_loss = 0.0 + num_batches = 0 + + for x in dist_dataset: + loss = distributed_train_step(x) + total_loss += loss.numpy() + num_batches += 1 + + train_loss = total_loss / num_batches + print(f"Epoch {epoch + 1}, Loss: {train_loss:.4f}, " + f"Accuracy: {train_accuracy.result():.4f}") + + train_accuracy.reset_states() + +# Using built-in Keras fit with strategy +with strategy.scope(): + model_fit = tf.keras.Sequential([ + tf.keras.layers.Dense(128, activation='relu', input_shape=(10,)), + tf.keras.layers.Dense(1, activation='sigmoid') + ]) + + model_fit.compile(optimizer='adam', + loss='binary_crossentropy', + metrics=['accuracy']) + +# Keras fit automatically handles distribution +model_fit.fit(dataset, epochs=5) + +# Multi-worker setup example (requires environment configuration) +# Set TF_CONFIG environment variable before running: +# os.environ['TF_CONFIG'] = json.dumps({ +# 'cluster': { +# 'worker': ["host1:port", "host2:port", "host3:port"], +# 'ps': ["host4:port", "host5:port"] +# }, +# 'task': {'type': 'worker', 'index': 1} +# }) + +# Strategy utilities +current_strategy = tf.distribute.get_strategy() +print(f"Current strategy: {type(current_strategy).__name__}") +print(f"Number of replicas: {current_strategy.num_replicas_in_sync}") + +# Check execution context +if tf.distribute.in_cross_replica_context(): + print("In cross-replica context") +else: + print("In replica context") + +# Custom reduction example +with strategy.scope(): + @tf.function + def replica_fn(): + return tf.constant([1.0, 2.0, 3.0]) + + # Run function on all replicas + per_replica_result = strategy.run(replica_fn) + + # Reduce across replicas + reduced_sum = strategy.reduce(tf.distribute.ReduceOp.SUM, per_replica_result) + reduced_mean = strategy.reduce(tf.distribute.ReduceOp.MEAN, per_replica_result) + + print(f"Sum: {reduced_sum}") + print(f"Mean: {reduced_mean}") +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-tensorflow/docs/image.md b/.tessl/tiles/tessl/pypi-tensorflow/docs/image.md new file mode 100644 index 0000000..75b5dff --- /dev/null +++ b/.tessl/tiles/tessl/pypi-tensorflow/docs/image.md @@ -0,0 +1,493 @@ +# Image Processing + +Comprehensive image manipulation, transformation, and computer vision operations for preprocessing and augmentation. These operations provide the tools needed for image-based machine learning workflows. + +## Capabilities + +### Image Decoding and Encoding + +Operations for reading and writing images in various formats. + +```python { .api } +def decode_image(contents, channels=None, dtype=tf.uint8, name=None, expand_animations=True): + """ + Function for decode_bmp, decode_gif, decode_jpeg, and decode_png. + + Parameters: + - contents: 0-D. The encoded image bytes + - channels: An optional int. Defaults to 0. Number of color channels for the decoded image + - dtype: The desired DType of the returned Tensor + - name: A name for the operation + - expand_animations: Controls the shape of the returned op's output + + Returns: + Tensor with type dtype and a 3- or 4-dimensional shape + """ + +def decode_jpeg(contents, channels=0, ratio=1, fancy_upsampling=True, + try_recover_truncated=False, acceptable_fraction=1, + dct_method="", name=None): + """ + Decode a JPEG-encoded image to a uint8 tensor. + + Parameters: + - contents: A Tensor of type string. 0-D. The JPEG-encoded image + - channels: An optional int. Defaults to 0. Number of color channels for the decoded image + - ratio: An optional int. Defaults to 1. Downscaling ratio + - fancy_upsampling: An optional bool. Defaults to True. If true use a slower but nicer upsampling + - try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input + - acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted + - dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression + - name: A name for the operation + + Returns: + A Tensor of type uint8 + """ + +def decode_png(contents, channels=0, dtype=tf.uint8, name=None): + """ + Decode a PNG-encoded image to a uint8 or uint16 tensor. + + Parameters: + - contents: A Tensor of type string. 0-D. The PNG-encoded image + - channels: An optional int. Defaults to 0. Number of color channels for the decoded image + - dtype: An optional tf.DType from: tf.uint8, tf.uint16. Defaults to tf.uint8 + - name: A name for the operation + + Returns: + A Tensor of type dtype + """ + +def encode_jpeg(image, format="", quality=95, progressive=False, + optimize_size=False, chroma_downsampling=True, + density_unit="in", x_density=300, y_density=300, + xmp_metadata="", name=None): + """ + JPEG-encode an image. + + Parameters: + - image: A Tensor of type uint8. 3-D with shape [height, width, channels] + - format: An optional string from: "", "grayscale", "rgb". Defaults to "" + - quality: An optional int. Defaults to 95. Quality of the compression from 0 to 100 + - progressive: An optional bool. Defaults to False. If True, create a JPEG that loads progressively + - optimize_size: An optional bool. Defaults to False. If True, spend CPU/RAM to reduce size with no quality change + - chroma_downsampling: An optional bool. Defaults to True. See http://en.wikipedia.org/wiki/Chroma_subsampling + - density_unit: An optional string from: "in", "cm". Defaults to "in". Unit used to specify x_density and y_density + - x_density: An optional int. Defaults to 300. Horizontal pixels per density unit + - y_density: An optional int. Defaults to 300. Vertical pixels per density unit + - xmp_metadata: An optional string. Defaults to "". If not empty, embed this XMP metadata in the image header + - name: A name for the operation + + Returns: + A Tensor of type string + """ + +def encode_png(image, compression=-1, name=None): + """ + PNG-encode an image. + + Parameters: + - image: A Tensor. Must be one of the following types: uint8, uint16. 3-D with shape [height, width, channels] + - compression: An optional int. Defaults to -1. Compression level + - name: A name for the operation + + Returns: + A Tensor of type string + """ +``` + +### Image Resizing and Cropping + +Operations for resizing and cropping images. + +```python { .api } +def resize(images, size, method=ResizeMethod.BILINEAR, preserve_aspect_ratio=False, + antialias=False, name=None): + """ + Resize images to size using the specified method. + + Parameters: + - images: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels] + - size: A 1-D int32 Tensor of 2 elements: new_height, new_width + - method: An image.ResizeMethod, or string equivalent + - preserve_aspect_ratio: Whether to preserve the aspect ratio + - antialias: Whether to use an anti-aliasing filter when downsampling an image + - name: A name for this operation + + Returns: + If images was 4-D, a 4-D float Tensor of shape [batch, new_height, new_width, channels] + """ + +def resize_with_pad(image, target_height, target_width, method=ResizeMethod.BILINEAR, antialias=False): + """ + Resizes and pads an image to a target width and height. + + Parameters: + - image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels] + - target_height: Target height + - target_width: Target width + - method: An image.ResizeMethod, or string equivalent + - antialias: Whether to use an anti-aliasing filter when downsampling an image + + Returns: + Resized and padded image + """ + +def crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width): + """ + Crops an image to a specified bounding box. + + Parameters: + - image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels] + - offset_height: Vertical coordinate of the top-left corner of the result in the input + - offset_width: Horizontal coordinate of the top-left corner of the result in the input + - target_height: Height of the result + - target_width: Width of the result + + Returns: + Cropped image(s) + """ + +def central_crop(image, central_fraction): + """ + Crop the central region of the image(s). + + Parameters: + - image: Either a 3-D float Tensor of shape [height, width, depth], or a 4-D Tensor of shape [batch_size, height, width, depth] + - central_fraction: float (0, 1], fraction of size to crop + + Returns: + 3-D / 4-D float Tensor, as per the input + """ + +def random_crop(value, size, seed=None, name=None): + """ + Randomly crops a tensor to a given size. + + Parameters: + - value: Input tensor to crop + - size: 1-D tensor with size the rank of value + - seed: A shape [2] Tensor, the seed to the random number generator + - name: A name for this operation + + Returns: + A cropped tensor of the same rank as value and shape size + """ +``` + +### Image Transformations + +Geometric transformations and spatial manipulations. + +```python { .api } +def flip_left_right(image): + """ + Flip an image horizontally (left to right). + + Parameters: + - image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels] + + Returns: + A tensor of the same type and shape as image + """ + +def flip_up_down(image): + """ + Flip an image vertically (upside down). + + Parameters: + - image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels] + + Returns: + A tensor of the same type and shape as image + """ + +def transpose(image, name=None): + """ + Transpose image(s) by swapping the height and width dimension. + + Parameters: + - image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels] + - name: A name for this operation + + Returns: + A tensor of the same type and shape as image, transposed + """ + +def rot90(image, k=1, name=None): + """ + Rotate image(s) counter-clockwise by 90 degrees. + + Parameters: + - image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels] + - k: A scalar integer tensor. The number of times the image is rotated by 90 degrees + - name: A name for this operation + + Returns: + A rotated tensor of the same type and shape as image + """ + +def random_flip_left_right(image, seed=None): + """ + Randomly flip an image horizontally (left to right). + + Parameters: + - image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels] + - seed: A Python integer. Used to create a random seed + + Returns: + A tensor of the same type and shape as image + """ + +def random_flip_up_down(image, seed=None): + """ + Randomly flips an image vertically (upside down). + + Parameters: + - image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels] + - seed: A Python integer. Used to create a random seed + + Returns: + A tensor of the same type and shape as image + """ +``` + +### Color Space and Enhancement + +Operations for color manipulation and image enhancement. + +```python { .api } +def rgb_to_grayscale(images, name=None): + """ + Converts one or more images from RGB to Grayscale. + + Parameters: + - images: The RGB tensor to convert. The last dimension must have size 3 and should contain RGB values + - name: A name for the operation + + Returns: + The converted grayscale image(s) + """ + +def grayscale_to_rgb(images, name=None): + """ + Converts one or more images from Grayscale to RGB. + + Parameters: + - images: The Grayscale tensor to convert. Last dimension must be size 1 + - name: A name for the operation + + Returns: + The converted RGB image(s) + """ + +def rgb_to_hsv(images, name=None): + """ + Converts one or more images from RGB to HSV. + + Parameters: + - images: A Tensor. Must be one of the following types: half, bfloat16, float32, float64 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as images + """ + +def hsv_to_rgb(images, name=None): + """ + Converts one or more images from HSV to RGB. + + Parameters: + - images: A Tensor. Must be one of the following types: half, bfloat16, float32, float64 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as images + """ + +def adjust_brightness(image, delta): + """ + Adjust the brightness of RGB or Grayscale images. + + Parameters: + - image: RGB image or images to adjust + - delta: A scalar. Amount to add to the pixel values + + Returns: + The brightness-adjusted image(s) + """ + +def adjust_contrast(images, contrast_factor): + """ + Adjust contrast of RGB or grayscale images. + + Parameters: + - images: Images to adjust. At least 3-D + - contrast_factor: A float multiplier for adjusting contrast + + Returns: + The contrast-adjusted image or images + """ + +def adjust_hue(image, delta, name=None): + """ + Adjust hue of RGB images. + + Parameters: + - image: RGB image or images. The image hue is adjusted by converting the image(s) to HSV and rotating the hue channel (H) + - delta: float. How much to add to the hue channel + - name: A name for this operation + + Returns: + The hue-adjusted image or images + """ + +def adjust_saturation(image, saturation_factor, name=None): + """ + Adjust saturation of RGB images. + + Parameters: + - image: RGB image or images. The image saturation is adjusted by converting the image to HSV and multiplying the saturation (S) + - saturation_factor: float. Factor to multiply the saturation by + - name: A name for this operation + + Returns: + The saturation-adjusted image or images + """ + +def random_brightness(image, max_delta, seed=None): + """ + Adjust the brightness of images by a random factor. + + Parameters: + - image: An image or images to adjust + - max_delta: float, must be non-negative + - seed: A Python integer. Used to create a random seed + + Returns: + The brightness-adjusted image(s) + """ + +def random_contrast(image, lower, upper, seed=None): + """ + Adjust the contrast of an image or images by a random factor. + + Parameters: + - image: An image tensor with 3 or more dimensions + - lower: float. Lower bound for the random contrast factor + - upper: float. Upper bound for the random contrast factor + - seed: A Python integer. Used to create a random seed + + Returns: + The contrast-adjusted tensor + """ +``` + +### Image Quality and Metrics + +Operations for measuring image quality and computing metrics. + +```python { .api } +def psnr(a, b, max_val, name=None): + """ + Returns the Peak Signal-to-Noise Ratio between a and b. + + Parameters: + - a: First set of images + - b: Second set of images + - max_val: The dynamic range of the images (i.e., the difference between the maximum the and minimum allowed values) + - name: Namespace to embed the computation in + + Returns: + The scalar PSNR between a and b. The returned tensor has type tf.float32 and shape [batch_size, 1] + """ + +def ssim(img1, img2, max_val, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03): + """ + Computes SSIM index between img1 and img2. + + Parameters: + - img1: First image batch + - img2: Second image batch + - max_val: The dynamic range of the images (i.e., the difference between the maximum the and minimum allowed values) + - filter_size: Default value 11 (size of gaussian filter) + - filter_sigma: Default value 1.5 (width of gaussian filter) + - k1: Default value 0.01 + - k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so it should be larger that K1) + + Returns: + A tensor containing an SSIM value for each image in batch + """ + +def total_variation(images, name=None): + """ + Calculate and return the total variation for one or more images. + + Parameters: + - images: A Tensor. Must be one of the following types: half, float32, float64 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as images + """ +``` + +## Usage Examples + +```python +import tensorflow as tf +import numpy as np + +# Read and decode images +image_string = tf.io.read_file('path/to/image.jpg') +image = tf.image.decode_jpeg(image_string, channels=3) + +# Resize image +resized_image = tf.image.resize(image, [224, 224]) + +# Random augmentations +augmented_image = tf.image.random_flip_left_right(image) +augmented_image = tf.image.random_brightness(augmented_image, max_delta=0.1) +augmented_image = tf.image.random_contrast(augmented_image, lower=0.8, upper=1.2) + +# Crop operations +central_cropped = tf.image.central_crop(image, central_fraction=0.8) +random_cropped = tf.image.random_crop(image, size=[100, 100, 3]) + +# Color space conversions +grayscale = tf.image.rgb_to_grayscale(image) +hsv_image = tf.image.rgb_to_hsv(image) + +# Image processing pipeline for training +def preprocess_image(image_path, label): + image = tf.io.read_file(image_path) + image = tf.image.decode_jpeg(image, channels=3) + image = tf.image.resize(image, [224, 224]) + image = tf.cast(image, tf.float32) / 255.0 + + # Data augmentation + image = tf.image.random_flip_left_right(image) + image = tf.image.random_brightness(image, max_delta=0.1) + image = tf.image.random_contrast(image, lower=0.9, upper=1.1) + + return image, label + +# Batch processing +batch_size = 32 +image_paths = ["path1.jpg", "path2.jpg", ...] # List of image paths +labels = [0, 1, ...] # Corresponding labels + +dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels)) +dataset = dataset.map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE) +dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE) + +# Quality metrics +img1 = tf.random.uniform([1, 256, 256, 3]) +img2 = tf.random.uniform([1, 256, 256, 3]) + +psnr_value = tf.image.psnr(img1, img2, max_val=1.0) +ssim_value = tf.image.ssim(img1, img2, max_val=1.0) + +print(f"PSNR: {psnr_value.numpy()}") +print(f"SSIM: {ssim_value.numpy()}") +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-tensorflow/docs/index.md b/.tessl/tiles/tessl/pypi-tensorflow/docs/index.md new file mode 100644 index 0000000..26f58b6 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-tensorflow/docs/index.md @@ -0,0 +1,328 @@ +# TensorFlow + +An end-to-end open source platform for machine learning that provides a comprehensive ecosystem of tools, libraries, and community resources for both research and production deployment. TensorFlow supports stable APIs for Python and C++, along with experimental APIs for other languages, enabling developers to build and deploy ML-powered applications efficiently across various hardware accelerators (CPUs, GPUs, TPUs) and distributed training environments. + +## Package Information + +- **Package Name**: tensorflow +- **Language**: Python (with C++ core) +- **Installation**: `pip install tensorflow` +- **Version**: 2.20.0 + +## Core Imports + +```python +import tensorflow as tf +``` + +For Keras high-level API: + +```python +from tensorflow import keras +``` + +For specific modules: + +```python +from tensorflow.keras import layers, models, optimizers +from tensorflow.data import Dataset +import tensorflow.nn as nn +``` + +## Basic Usage + +```python +import tensorflow as tf +import numpy as np + +# Create tensors +x = tf.constant([1.0, 2.0, 3.0]) +y = tf.constant([4.0, 5.0, 6.0]) + +# Basic operations +z = tf.add(x, y) +print(z) # tf.Tensor([5. 7. 9.], shape=(3,), dtype=float32) + +# Create a simple neural network with Keras +model = tf.keras.Sequential([ + tf.keras.layers.Dense(128, activation='relu', input_shape=(784,)), + tf.keras.layers.Dropout(0.2), + tf.keras.layers.Dense(10, activation='softmax') +]) + +# Compile the model +model.compile(optimizer='adam', + loss='sparse_categorical_crossentropy', + metrics=['accuracy']) + +# Create sample data +X_train = np.random.random((1000, 784)) +y_train = np.random.randint(10, size=(1000,)) + +# Train the model +model.fit(X_train, y_train, epochs=5, batch_size=32) +``` + +## Architecture + +TensorFlow's architecture is built around several key components: + +- **Tensors**: Multi-dimensional arrays that flow through computational graphs +- **Operations**: Mathematical computations performed on tensors +- **Graphs**: Computational graphs that define the flow of data and operations +- **Sessions**: Runtime environments for executing graphs (TF 1.x) or eager execution (TF 2.x) +- **Keras**: High-level API for building and training neural networks +- **Estimators**: High-level API for distributed training and evaluation + +The framework supports both eager execution (default in TF 2.x) for immediate operation evaluation and graph mode for optimized production deployment. + +## Capabilities + +### Core Tensor Operations + +Fundamental tensor creation, manipulation, and mathematical operations that form the foundation of TensorFlow computations. + +```python { .api } +def constant(value, dtype=None, shape=None, name="Const"): ... +def Variable(initial_value, trainable=None, validate_shape=True, + caching_device=None, name=None, variable_def=None, dtype=None, + import_scope=None, constraint=None, synchronization=tf.VariableSynchronization.AUTO, + aggregation=tf.VariableAggregation.NONE, shape=None, + experimental_enable_variable_lifting=True): ... +def convert_to_tensor(value, dtype=None, dtype_hint=None, name=None): ... +def cast(x, dtype, name=None): ... +def reshape(tensor, shape, name=None): ... +def transpose(a, perm=None, conjugate=False, name="transpose"): ... +def zeros(shape, dtype=tf.float32, name=None): ... +def ones(shape, dtype=tf.float32, name=None): ... +``` + +[Core Operations](./core.md) + +### Math Operations + +Comprehensive mathematical operations including arithmetic, trigonometric, linear algebra, and statistical functions. + +```python { .api } +def add(x, y, name=None): ... +def subtract(x, y, name=None): ... +def multiply(x, y, name=None): ... +def divide(x, y, name=None): ... +def matmul(a, b, transpose_a=False, transpose_b=False, adjoint_a=False, + adjoint_b=False, a_is_sparse=False, b_is_sparse=False, output_type=None, + grad_a=False, grad_b=False, name=None): ... +def reduce_sum(input_tensor, axis=None, keepdims=None, name=None): ... +def reduce_mean(input_tensor, axis=None, keepdims=None, name=None): ... +``` + +[Math Operations](./math.md) + +### Neural Network Operations + +Core neural network operations including activations, convolutions, pooling, normalization, and loss functions. + +```python { .api } +def relu(features, name=None): ... +def softmax(logits, axis=None, name=None): ... +def conv2d(input, filters, strides, padding, use_cudnn_on_gpu=True, data_format="NHWC", + dilations=[1,1,1,1], name=None): ... +def max_pool2d(input, ksize, strides, padding, data_format="NHWC", name=None): ... +def batch_normalization(x, mean, variance, offset, scale, variance_epsilon, name=None): ... +def softmax_cross_entropy_with_logits(labels, logits, axis=-1, name=None): ... +``` + +[Neural Network Operations](./nn.md) + +### Keras High-Level API + +High-level neural network building blocks including models, layers, optimizers, losses, and metrics for rapid prototyping and production. + +```python { .api } +class Sequential(Model): ... +class Model: ... +class Dense(Layer): ... +class Conv2D(Layer): ... +class LSTM(Layer): ... +class Adam(Optimizer): ... +class SGD(Optimizer): ... +``` + +[Keras API](./keras.md) + +### Data Processing + +Dataset creation, transformation, and preprocessing pipeline operations for efficient data handling and training workflows. + +```python { .api } +class Dataset: + @staticmethod + def from_tensor_slices(tensors, name=None): ... + @staticmethod + def from_tensors(tensors, name=None): ... + def map(self, map_func, num_parallel_calls=None, deterministic=None, name=None): ... + def batch(self, batch_size, drop_remainder=False, num_parallel_calls=None, + deterministic=None, name=None): ... + def shuffle(self, buffer_size, seed=None, reshuffle_each_iteration=None, name=None): ... + def repeat(self, count=None, name=None): ... +``` + +[Data Processing](./data.md) + +### Image Processing + +Comprehensive image manipulation, transformation, and computer vision operations for preprocessing and augmentation. + +```python { .api } +def decode_image(contents, channels=None, dtype=tf.uint8, name=None, expand_animations=True): ... +def resize(images, size, method=ResizeMethod.BILINEAR, preserve_aspect_ratio=False, + antialias=False, name=None): ... +def random_flip_left_right(image, seed=None): ... +def random_brightness(image, max_delta, seed=None): ... +def convert_image_dtype(image, dtype, saturate=False, name=None): ... +``` + +[Image Processing](./image.md) + +### Model Saving and Loading + +Complete model serialization, checkpointing, and deployment utilities for production and inference. + +```python { .api } +def save(obj, export_dir, signatures=None, options=None): ... +def load(export_dir, tags=None, options=None): ... +class Checkpoint: + def __init__(self, **kwargs): ... + def save(self, file_prefix, session=None): ... + def restore(self, save_path): ... +``` + +[Model Management](./saved-model.md) + +### Distribution Strategies + +Multi-device and multi-worker training strategies for scaling machine learning workloads across GPUs and TPUs. + +```python { .api } +class MirroredStrategy(Strategy): ... +class MultiWorkerMirroredStrategy(Strategy): ... +class TPUStrategy(Strategy): ... +class ParameterServerStrategy(Strategy): ... +``` + +[Distribution](./distribute.md) + +### Automatic Differentiation + +Gradient computation and automatic differentiation functionality for training neural networks. + +```python { .api } +class GradientTape: + def __init__(self, persistent=False, watch_accessed_variables=True): ... + def watch(self, tensor): ... + def gradient(self, target, sources, output_gradients=None, + unconnected_gradients=UnconnectedGradients.NONE): ... +def gradient(target, sources, grad_ys=None, name="gradients", + gate_gradients=False, aggregation_method=None, + stop_gradients=None, unconnected_gradients=UnconnectedGradients.NONE): ... +``` + +### Random Operations + +Random number generation and sampling operations for stochastic computations. + +```python { .api } +def random_normal(shape, mean=0.0, stddev=1.0, dtype=tf.float32, seed=None, name=None): ... +def random_uniform(shape, minval=0, maxval=None, dtype=tf.float32, seed=None, name=None): ... +def random_shuffle(value, seed=None, name=None): ... +def set_seed(seed): ... +``` + +### Configuration and System + +System configuration, device management, and runtime settings. + +```python { .api } +def list_physical_devices(device_type=None): ... +def list_logical_devices(device_type=None): ... +def experimental_set_memory_growth(device, enable): ... +def experimental_get_memory_info(device): ... +``` + +### Input/Output Operations + +File system operations, data serialization, and I/O utilities. + +```python { .api } +def read_file(filename, name=None): ... +def write_file(filename, contents, name=None): ... +def matching_files(pattern, name=None): ... +def decode_raw(input_bytes, out_type, little_endian=True, fixed_length=None, name=None): ... +``` + +## Types + +```python { .api } +class Tensor: + """Multi-dimensional array with uniform data type.""" + @property + def shape(self): ... + @property + def dtype(self): ... + @property + def device(self): ... + def numpy(self): ... + +class Variable(Tensor): + """Mutable tensor for storing model parameters.""" + def assign(self, value, use_locking=None, name=None, read_value=True): ... + def assign_add(self, delta, use_locking=None, name=None, read_value=True): ... + +class Operation: + """Computation node in a TensorFlow graph.""" + @property + def name(self): ... + @property + def type(self): ... + @property + def inputs(self): ... + @property + def outputs(self): ... + +# Data types +DType = tf.DType +float16 = tf.float16 +float32 = tf.float32 +float64 = tf.float64 +int8 = tf.int8 +int16 = tf.int16 +int32 = tf.int32 +int64 = tf.int64 +uint8 = tf.uint8 +uint16 = tf.uint16 +uint32 = tf.uint32 +uint64 = tf.uint64 +bool = tf.bool +string = tf.string +complex64 = tf.complex64 +complex128 = tf.complex128 + +# Enumerations +class VariableSynchronization: + """Variable synchronization modes for distributed training.""" + NONE = "VariableSynchronization.NONE" + ON_WRITE = "VariableSynchronization.ON_WRITE" + ON_READ = "VariableSynchronization.ON_READ" + AUTO = "VariableSynchronization.AUTO" + +class VariableAggregation: + """Variable aggregation modes for distributed training.""" + NONE = "VariableAggregation.NONE" + SUM = "VariableAggregation.SUM" + MEAN = "VariableAggregation.MEAN" + ONLY_FIRST_REPLICA = "VariableAggregation.ONLY_FIRST_REPLICA" + +class UnconnectedGradients: + """Gradient computation modes for unconnected inputs.""" + NONE = "UnconnectedGradients.NONE" + ZERO = "UnconnectedGradients.ZERO" +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-tensorflow/docs/keras.md b/.tessl/tiles/tessl/pypi-tensorflow/docs/keras.md new file mode 100644 index 0000000..dbb8f74 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-tensorflow/docs/keras.md @@ -0,0 +1,409 @@ +# Keras High-Level API + +High-level neural network building blocks including models, layers, optimizers, losses, and metrics for rapid prototyping and production. Keras provides an intuitive interface for building and training deep learning models. + +## Capabilities + +### Models + +High-level model classes for building neural networks. + +```python { .api } +class Sequential(Model): + """ + Sequential groups a linear stack of layers into a tf.keras.Model. + + Methods: + - add(layer): Adds a layer instance on top of the layer stack + - pop(): Removes the last layer in the model + - compile(optimizer, loss, metrics): Configures the model for training + - fit(x, y, **kwargs): Trains the model for a fixed number of epochs + - evaluate(x, y, **kwargs): Returns the loss value & metrics values for the model + - predict(x, **kwargs): Generates output predictions for the input samples + """ + +class Model: + """ + Model groups layers into an object with training and inference features. + + Methods: + - compile(optimizer, loss, metrics): Configures the model for training + - fit(x, y, **kwargs): Trains the model for a fixed number of epochs + - evaluate(x, y, **kwargs): Returns the loss value & metrics values for the model + - predict(x, **kwargs): Generates output predictions for the input samples + - save(filepath, **kwargs): Saves the model to Tensorflow SavedModel or a single HDF5 file + - load_model(filepath, **kwargs): Loads a model saved via save() + - summary(): Prints a string summary of the network + - get_weights(): Retrieves the weights of the model + - set_weights(weights): Sets the weights of the model + """ + +def load_model(filepath, custom_objects=None, compile=True, options=None): + """ + Loads a model saved via model.save(). + + Parameters: + - filepath: One of the following: String or pathlib.Path object, path to the saved model + - custom_objects: Optional dictionary mapping names to custom classes or functions + - compile: Boolean, whether to compile the model after loading + - options: Optional tf.saved_model.LoadOptions object that specifies options for loading from SavedModel + + Returns: + A Keras model instance + """ + +def save_model(model, filepath, overwrite=True, include_optimizer=True, save_format=None, + signatures=None, options=None, save_traces=True): + """ + Saves a model as a TensorFlow SavedModel or HDF5 file. + + Parameters: + - model: Keras model instance to be saved + - filepath: One of the following: String or pathlib.Path object, path where to save the model + - overwrite: Whether we should overwrite any existing model at the target location + - include_optimizer: If True, save optimizer's state together + - save_format: Either 'tf' or 'h5', indicating whether to save the model to Tensorflow SavedModel or HDF5 + - signatures: Signatures to save with the SavedModel + - options: Optional tf.saved_model.SaveOptions object that specifies options for saving to SavedModel + - save_traces: When enabled, the SavedModel will store the function traces for each layer + """ +``` + +### Core Layers + +Essential layer types for building neural networks. + +```python { .api } +class Dense(Layer): + """ + Just your regular densely-connected NN layer. + + Parameters: + - units: Positive integer, dimensionality of the output space + - activation: Activation function to use + - use_bias: Boolean, whether the layer uses a bias vector + - kernel_initializer: Initializer for the kernel weights matrix + - bias_initializer: Initializer for the bias vector + - kernel_regularizer: Regularizer function applied to the kernel weights matrix + - bias_regularizer: Regularizer function applied to the bias vector + - activity_regularizer: Regularizer function applied to the output of the layer + - kernel_constraint: Constraint function applied to the kernel weights matrix + - bias_constraint: Constraint function applied to the bias vector + """ + +class Dropout(Layer): + """ + Applies Dropout to the input. + + Parameters: + - rate: Float between 0 and 1. Fraction of the input units to drop + - noise_shape: 1D integer tensor representing the shape of the binary dropout mask + - seed: A Python integer to use as random seed + """ + +class Flatten(Layer): + """ + Flattens the input. Does not affect the batch size. + + Parameters: + - data_format: A string, one of channels_last (default) or channels_first + """ + +class Reshape(Layer): + """ + Reshapes an output to a certain shape. + + Parameters: + - target_shape: Target shape. Tuple of integers, does not include the samples dimension (batch size) + """ + +class Input: + """ + Input() is used to instantiate a Keras tensor. + + Parameters: + - shape: A shape tuple (integers), not including the batch size + - batch_size: optional static batch size (integer) + - name: An optional name string for the layer + - dtype: The data type expected by the input, as a string + - sparse: A boolean specifying whether the placeholder to be created is sparse + - tensor: Optional existing tensor to wrap into the Input layer + - ragged: A boolean specifying whether the placeholder to be created is ragged + """ + +class Lambda(Layer): + """ + Wraps arbitrary expressions as a Layer object. + + Parameters: + - function: The function to be evaluated. Takes input tensor as first argument + - output_shape: Expected output shape from function + - mask: Either None (no masking) or a callable with the same signature as the compute_mask layer method + - arguments: Optional dictionary of keyword arguments to be passed to the function + """ +``` + +### Convolutional Layers + +Layers for processing spatial data such as images. + +```python { .api } +class Conv2D(Layer): + """ + 2D convolution layer (e.g. spatial convolution over images). + + Parameters: + - filters: Integer, the dimensionality of the output space + - kernel_size: An integer or tuple/list of 2 integers, specifying the height and width of the 2D convolution window + - strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution + - padding: one of "valid" or "same" (case-insensitive) + - data_format: A string, one of channels_last (default) or channels_first + - dilation_rate: an integer or tuple/list of 2 integers, specifying the dilation rate to use for dilated convolution + - groups: A positive integer specifying the number of groups in which the input is split + - activation: Activation function to use + - use_bias: Boolean, whether the layer uses a bias vector + - kernel_initializer: Initializer for the kernel weights matrix + - bias_initializer: Initializer for the bias vector + """ + +class Conv2DTranspose(Layer): + """ + Transposed convolution layer (sometimes called Deconvolution). + + Parameters: + - filters: Integer, the dimensionality of the output space + - kernel_size: An integer or tuple/list of 2 integers, specifying the height and width of the 2D convolution window + - strides: An integer or tuple/list of 2 integers, specifying the strides of the convolution + - padding: one of "valid" or "same" (case-insensitive) + - output_padding: An integer or tuple/list of 2 integers, specifying the amount of padding along the height and width + - data_format: A string, one of channels_last (default) or channels_first + - dilation_rate: an integer or tuple/list of 2 integers, specifying the dilation rate to use for dilated convolution + - activation: Activation function to use + - use_bias: Boolean, whether the layer uses a bias vector + """ + +class MaxPooling2D(Layer): + """ + Max pooling operation for 2D spatial data. + + Parameters: + - pool_size: integer or tuple of 2 integers, window size over which to take the maximum + - strides: Integer, tuple of 2 integers, or None. Strides values + - padding: One of "valid" or "same" (case-insensitive) + - data_format: A string, one of channels_last (default) or channels_first + """ + +class AveragePooling2D(Layer): + """ + Average pooling operation for 2D spatial data. + + Parameters: + - pool_size: integer or tuple of 2 integers, factors by which to downscale (vertical, horizontal) + - strides: Integer, tuple of 2 integers, or None + - padding: One of "valid" or "same" (case-insensitive) + - data_format: A string, one of channels_last (default) or channels_first + """ +``` + +### Recurrent Layers + +Layers for processing sequential data. + +```python { .api } +class LSTM(Layer): + """ + Long Short-Term Memory layer - Hochreiter 1997. + + Parameters: + - units: Positive integer, dimensionality of the output space + - activation: Activation function to use + - recurrent_activation: Activation function to use for the recurrent step + - use_bias: Boolean (default True), whether the layer uses a bias vector + - kernel_initializer: Initializer for the kernel weights matrix + - recurrent_initializer: Initializer for the recurrent_kernel weights matrix + - bias_initializer: Initializer for the bias vector + - unit_forget_bias: Boolean (default True). If True, add 1 to the bias of the forget gate at initialization + - kernel_regularizer: Regularizer function applied to the kernel weights matrix + - recurrent_regularizer: Regularizer function applied to the recurrent_kernel weights matrix + - bias_regularizer: Regularizer function applied to the bias vector + - activity_regularizer: Regularizer function applied to the output of the layer + - kernel_constraint: Constraint function applied to the kernel weights matrix + - recurrent_constraint: Constraint function applied to the recurrent_kernel weights matrix + - bias_constraint: Constraint function applied to the bias vector + - dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs + - recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state + - return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence + - return_state: Boolean. Whether to return the last state in addition to the output + - go_backwards: Boolean (default False). If True, process the input sequence backwards and return the reversed sequence + - stateful: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch + - time_major: The shape format of the inputs and outputs. If True, the inputs and outputs will be in shape (timesteps, batch, ...), whereas in the False case, it will be (batch, timesteps, ...) + - unroll: Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used + """ + +class GRU(Layer): + """ + Gated Recurrent Unit - Cho et al. 2014. + + Parameters: + - units: Positive integer, dimensionality of the output space + - activation: Activation function to use + - recurrent_activation: Activation function to use for the recurrent step + - use_bias: Boolean, (default True), whether the layer uses a bias vector + - kernel_initializer: Initializer for the kernel weights matrix + - recurrent_initializer: Initializer for the recurrent_kernel weights matrix + - bias_initializer: Initializer for the bias vector + - kernel_regularizer: Regularizer function applied to the kernel weights matrix + - recurrent_regularizer: Regularizer function applied to the recurrent_kernel weights matrix + - bias_regularizer: Regularizer function applied to the bias vector + - activity_regularizer: Regularizer function applied to the output of the layer + - kernel_constraint: Constraint function applied to the kernel weights matrix + - recurrent_constraint: Constraint function applied to the recurrent_kernel weights matrix + - bias_constraint: Constraint function applied to the bias vector + - dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs + - recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state + - return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence + - return_state: Boolean. Whether to return the last state in addition to the output + - go_backwards: Boolean (default False). If True, process the input sequence backwards and return the reversed sequence + - stateful: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch + - unroll: Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used + - time_major: The shape format of the inputs and outputs + - reset_after: GRU convention (whether to apply reset gate after or before matrix multiplication) + """ + +class SimpleRNN(Layer): + """ + Fully-connected RNN where the output is to be fed back to input. + + Parameters: + - units: Positive integer, dimensionality of the output space + - activation: Activation function to use + - use_bias: Boolean, (default True), whether the layer uses a bias vector + - kernel_initializer: Initializer for the kernel weights matrix + - recurrent_initializer: Initializer for the recurrent_kernel weights matrix + - bias_initializer: Initializer for the bias vector + - kernel_regularizer: Regularizer function applied to the kernel weights matrix + - recurrent_regularizer: Regularizer function applied to the recurrent_kernel weights matrix + - bias_regularizer: Regularizer function applied to the bias vector + - activity_regularizer: Regularizer function applied to the output of the layer + - kernel_constraint: Constraint function applied to the kernel weights matrix + - recurrent_constraint: Constraint function applied to the recurrent_kernel weights matrix + - bias_constraint: Constraint function applied to the bias vector + - dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs + - recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state + - return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence + - return_state: Boolean. Whether to return the last state in addition to the output + - go_backwards: Boolean (default False). If True, process the input sequence backwards and return the reversed sequence + - stateful: Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch + - unroll: Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used + """ +``` + +### Optimizers + +Optimization algorithms for training neural networks. + +```python { .api } +class Adam(Optimizer): + """ + Optimizer that implements the Adam algorithm. + + Parameters: + - learning_rate: A Tensor, floating point value, or a schedule that is a tf.keras.optimizers.schedules.LearningRateSchedule + - beta_1: A float value or a constant float tensor, or a callable that takes no arguments and returns the actual value to use + - beta_2: A float value or a constant float tensor, or a callable that takes no arguments and returns the actual value to use + - epsilon: A small constant for numerical stability + - amsgrad: Boolean. Whether to apply AMSGrad variant of this algorithm from the paper "On the Convergence of Adam and beyond" + - name: Optional name prefix for the operations created when applying gradients + """ + +class SGD(Optimizer): + """ + Gradient descent (with momentum) optimizer. + + Parameters: + - learning_rate: A Tensor, floating point value, or a schedule that is a tf.keras.optimizers.schedules.LearningRateSchedule + - momentum: float hyperparameter >= 0 that accelerates gradient descent in the relevant direction and dampens oscillations + - nesterov: boolean. Whether to apply Nesterov momentum + - name: Optional name prefix for the operations created when applying gradients + """ + +class RMSprop(Optimizer): + """ + Optimizer that implements the RMSprop algorithm. + + Parameters: + - learning_rate: A Tensor, floating point value, or a schedule that is a tf.keras.optimizers.schedules.LearningRateSchedule + - rho: Discounting factor for the history/coming gradient + - momentum: A scalar or a scalar Tensor + - epsilon: A small constant for numerical stability + - centered: Boolean. If True, gradients are normalized by the estimated variance of the gradient + - name: Optional name prefix for the operations created when applying gradients + """ +``` + +## Usage Examples + +```python +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras import layers + +# Sequential model +model = keras.Sequential([ + layers.Dense(128, activation='relu', input_shape=(784,)), + layers.Dropout(0.2), + layers.Dense(10, activation='softmax') +]) + +# Functional API model +inputs = keras.Input(shape=(784,)) +x = layers.Dense(128, activation='relu')(inputs) +x = layers.Dropout(0.2)(x) +outputs = layers.Dense(10, activation='softmax')(x) +model = keras.Model(inputs=inputs, outputs=outputs) + +# Compile model +model.compile( + optimizer='adam', + loss='sparse_categorical_crossentropy', + metrics=['accuracy'] +) + +# Train model (example with dummy data) +import numpy as np +x_train = np.random.random((1000, 784)) +y_train = np.random.randint(10, size=(1000,)) + +model.fit(x_train, y_train, epochs=5, batch_size=32, validation_split=0.2) + +# Evaluate and predict +x_test = np.random.random((100, 784)) +y_test = np.random.randint(10, size=(100,)) + +loss, accuracy = model.evaluate(x_test, y_test) +predictions = model.predict(x_test) + +# Save and load model +model.save('my_model.h5') +loaded_model = keras.models.load_model('my_model.h5') + +# Convolutional model example +cnn_model = keras.Sequential([ + layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)), + layers.MaxPooling2D((2, 2)), + layers.Conv2D(64, (3, 3), activation='relu'), + layers.MaxPooling2D((2, 2)), + layers.Conv2D(64, (3, 3), activation='relu'), + layers.Flatten(), + layers.Dense(64, activation='relu'), + layers.Dense(10, activation='softmax') +]) + +# LSTM model example +lstm_model = keras.Sequential([ + layers.LSTM(50, return_sequences=True, input_shape=(10, 1)), + layers.LSTM(50, return_sequences=False), + layers.Dense(25), + layers.Dense(1) +]) +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-tensorflow/docs/math.md b/.tessl/tiles/tessl/pypi-tensorflow/docs/math.md new file mode 100644 index 0000000..63e58cf --- /dev/null +++ b/.tessl/tiles/tessl/pypi-tensorflow/docs/math.md @@ -0,0 +1,662 @@ +# Math Operations + +Comprehensive mathematical operations including arithmetic, trigonometric, linear algebra, and statistical functions. These operations provide the mathematical foundation for machine learning computations and numerical analysis. + +## Capabilities + +### Basic Arithmetic + +Fundamental arithmetic operations for tensor computations. + +```python { .api } +def add(x, y, name=None): + """ + Returns x + y element-wise. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, uint8, int8, int16, int32, int64, complex64, complex128, string + - y: A Tensor. Must have the same type as x + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def subtract(x, y, name=None): + """ + Returns x - y element-wise. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, int8, int16, int32, int64, complex64, complex128 + - y: A Tensor. Must have the same type as x + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def multiply(x, y, name=None): + """ + Returns x * y element-wise. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128 + - y: A Tensor. Must have the same type as x + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def divide(x, y, name=None): + """ + Computes Python style division of x by y. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128 + - y: A Tensor. Must have the same type as x + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def floordiv(x, y, name=None): + """ + Divides x / y elementwise, rounding toward the most negative integer. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, uint8, int8, uint16, int16, int32, int64 + - y: A Tensor. Must have the same type as x + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def mod(x, y, name=None): + """ + Returns element-wise remainder of division. + + Parameters: + - x: A Tensor. Must be one of the following types: int8, int16, int32, int64, uint8, uint16, uint32, uint64, bfloat16, half, float32, float64 + - y: A Tensor. Must have the same type as x + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def pow(x, y, name=None): + """ + Computes the power of one value to another. + + Parameters: + - x: A Tensor of type float16, float32, float64, int32, int64, complex64, or complex128 + - y: A Tensor of type float16, float32, float64, int32, int64, complex64, or complex128 + - name: A name for the operation + + Returns: + A Tensor + """ +``` + +### Mathematical Functions + +Common mathematical functions and operations. + +```python { .api } +def abs(x, name=None): + """ + Computes the absolute value of a tensor. + + Parameters: + - x: A Tensor or SparseTensor of type float16, float32, float64, int8, int16, int32, int64, complex64 or complex128 + - name: A name for the operation + + Returns: + A Tensor or SparseTensor the same size, type, and sparsity as x with absolute values + """ + +def sign(x, name=None): + """ + Returns an element-wise indication of the sign of a number. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, int8, int16, int32, int64, complex64, complex128 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def sqrt(x, name=None): + """ + Computes element-wise square root of the input tensor. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, complex64, complex128 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def square(x, name=None): + """ + Computes square of x element-wise. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, int8, int16, int32, int64, complex64, complex128 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def exp(x, name=None): + """ + Computes exponential of x element-wise. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, complex64, complex128 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def log(x, name=None): + """ + Computes natural logarithm of x element-wise. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, complex64, complex128 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def log10(x, name=None): + """ + Computes element-wise log base 10 of x. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, complex64, complex128 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ +``` + +### Trigonometric Functions + +Trigonometric and hyperbolic functions for mathematical computations. + +```python { .api } +def sin(x, name=None): + """ + Computes sine of x element-wise. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, complex64, complex128 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def cos(x, name=None): + """ + Computes cosine of x element-wise. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, complex64, complex128 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def tan(x, name=None): + """ + Computes tan of x element-wise. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, complex64, complex128 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def asin(x, name=None): + """ + Computes the trignometric inverse sine of x element-wise. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, int8, int16, int32, int64, complex64, complex128 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def acos(x, name=None): + """ + Computes acos of x element-wise. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, int8, int16, int32, int64, complex64, complex128 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def atan(x, name=None): + """ + Computes the trignometric inverse tangent of x element-wise. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, int8, int16, int32, int64, complex64, complex128 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def sinh(x, name=None): + """ + Computes hyperbolic sine of x element-wise. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, complex64, complex128 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def cosh(x, name=None): + """ + Computes hyperbolic cosine of x element-wise. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, complex64, complex128 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def tanh(x, name=None): + """ + Computes hyperbolic tangent of x element-wise. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, complex64, complex128 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ +``` + +### Linear Algebra + +Matrix operations and linear algebra functions. + +```python { .api } +def matmul(a, b, transpose_a=False, transpose_b=False, adjoint_a=False, + adjoint_b=False, a_is_sparse=False, b_is_sparse=False, output_type=None, + grad_a=False, grad_b=False, name=None): + """ + Multiplies matrix a by matrix b, producing a * b. + + Parameters: + - a: A Tensor of type float16, float32, float64, int32, int64, complex64, complex128 and rank > 1 + - b: A Tensor with same type and rank as a + - transpose_a: If True, a is transposed before multiplication + - transpose_b: If True, b is transposed before multiplication + - adjoint_a: If True, a is conjugated and transposed before multiplication + - adjoint_b: If True, b is conjugated and transposed before multiplication + - a_is_sparse: If True, a is treated as a sparse matrix + - b_is_sparse: If True, b is treated as a sparse matrix + - output_type: The output type of the operation (float16, float32, etc.) + - grad_a: Whether to use gradient optimized version for matrix a + - grad_b: Whether to use gradient optimized version for matrix b + - name: Name for the operation + + Returns: + A Tensor of the same type as a and b where each inner-most matrix is the product of the corresponding matrices in a and b + """ + +def transpose(a, perm=None, conjugate=False, name="transpose"): + """ + Transposes a. + + Parameters: + - a: A Tensor + - perm: A permutation of the dimensions of a + - conjugate: Setting it to True is mathematically equivalent to tf.math.conj(tf.transpose(input)) + - name: A name for the operation + + Returns: + A transposed Tensor + """ + +def trace(x, name=None): + """ + Compute the trace of a tensor x. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, int32, int64, complex64, complex128 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def det(input, name=None): + """ + Computes the determinant of one or more square matrices. + + Parameters: + - input: A Tensor of type float32, float64, complex64 or complex128 of shape [..., M, M] + - name: A name for the operation + + Returns: + A Tensor of the same type as input with shape [...] + """ + +def inv(input, adjoint=False, name=None): + """ + Computes the inverse of one or more square invertible matrices or their adjoints (conjugate transposes). + + Parameters: + - input: A Tensor. Must be one of the following types: float64, float32, half, complex64, complex128 + - adjoint: An optional bool. Defaults to False + - name: A name for the operation + + Returns: + A Tensor. Has the same type as input + """ + +def norm(tensor, ord='euclidean', axis=None, keepdims=None, name=None): + """ + Computes the norm of vectors, matrices, and tensors. + + Parameters: + - tensor: A Tensor + - ord: Order of the norm. Supported values are 'fro', 'euclidean', 1, 2, np.inf and any positive real number yielding the corresponding p-norm + - axis: If axis is None (the default), the input is considered a vector and a single vector norm is computed over the entire set of values in the tensor + - keepdims: If True, the axis indicated in axis are kept with size 1 + - name: The name of the op + + Returns: + A Tensor with the same type as tensor, containing the vector or matrix norms + """ +``` + +### Reduction Operations + +Operations that reduce tensor dimensions through aggregation. + +```python { .api } +def reduce_sum(input_tensor, axis=None, keepdims=None, name=None): + """ + Computes the sum of elements across dimensions of a tensor. + + Parameters: + - input_tensor: The tensor to reduce. Should have numeric type + - axis: The dimensions to reduce. If None (the default), reduces all dimensions + - keepdims: If true, retains reduced dimensions with length 1 + - name: A name for the operation + + Returns: + The reduced tensor + """ + +def reduce_mean(input_tensor, axis=None, keepdims=None, name=None): + """ + Computes the mean of elements across dimensions of a tensor. + + Parameters: + - input_tensor: The tensor to reduce. Should have numeric type + - axis: The dimensions to reduce. If None (the default), reduces all dimensions + - keepdims: If true, retains reduced dimensions with length 1 + - name: A name for the operation + + Returns: + The reduced tensor + """ + +def reduce_max(input_tensor, axis=None, keepdims=None, name=None): + """ + Computes the maximum of elements across dimensions of a tensor. + + Parameters: + - input_tensor: The tensor to reduce. Should have numeric type + - axis: The dimensions to reduce. If None (the default), reduces all dimensions + - keepdims: If true, retains reduced dimensions with length 1 + - name: A name for the operation + + Returns: + The reduced tensor + """ + +def reduce_min(input_tensor, axis=None, keepdims=None, name=None): + """ + Computes the minimum of elements across dimensions of a tensor. + + Parameters: + - input_tensor: The tensor to reduce. Should have numeric type + - axis: The dimensions to reduce. If None (the default), reduces all dimensions + - keepdims: If true, retains reduced dimensions with length 1 + - name: A name for the operation + + Returns: + The reduced tensor + """ + +def reduce_prod(input_tensor, axis=None, keepdims=None, name=None): + """ + Computes the product of elements across dimensions of a tensor. + + Parameters: + - input_tensor: The tensor to reduce. Should have numeric type + - axis: The dimensions to reduce. If None (the default), reduces all dimensions + - keepdims: If true, retains reduced dimensions with length 1 + - name: A name for the operation + + Returns: + The reduced tensor + """ + +def reduce_all(input_tensor, axis=None, keepdims=None, name=None): + """ + Computes the "logical and" of elements across dimensions of a tensor. + + Parameters: + - input_tensor: The boolean tensor to reduce + - axis: The dimensions to reduce. If None (the default), reduces all dimensions + - keepdims: If true, retains reduced dimensions with length 1 + - name: A name for the operation + + Returns: + The reduced tensor + """ + +def reduce_any(input_tensor, axis=None, keepdims=None, name=None): + """ + Computes the "logical or" of elements across dimensions of a tensor. + + Parameters: + - input_tensor: The boolean tensor to reduce + - axis: The dimensions to reduce. If None (the default), reduces all dimensions + - keepdims: If true, retains reduced dimensions with length 1 + - name: A name for the operation + + Returns: + The reduced tensor + """ +``` + +### Element-wise Comparisons + +Operations for comparing tensor elements. + +```python { .api } +def equal(x, y, name=None): + """ + Returns the truth value of (x == y) element-wise. + + Parameters: + - x: A Tensor + - y: A Tensor. Must have the same type as x + - name: A name for the operation + + Returns: + A Tensor of type bool + """ + +def not_equal(x, y, name=None): + """ + Returns the truth value of (x != y) element-wise. + + Parameters: + - x: A Tensor + - y: A Tensor. Must have the same type as x + - name: A name for the operation + + Returns: + A Tensor of type bool + """ + +def less(x, y, name=None): + """ + Returns the truth value of (x < y) element-wise. + + Parameters: + - x: A Tensor + - y: A Tensor. Must have the same type as x + - name: A name for the operation + + Returns: + A Tensor of type bool + """ + +def less_equal(x, y, name=None): + """ + Returns the truth value of (x <= y) element-wise. + + Parameters: + - x: A Tensor + - y: A Tensor. Must have the same type as x + - name: A name for the operation + + Returns: + A Tensor of type bool + """ + +def greater(x, y, name=None): + """ + Returns the truth value of (x > y) element-wise. + + Parameters: + - x: A Tensor + - y: A Tensor. Must have the same type as x + - name: A name for the operation + + Returns: + A Tensor of type bool + """ + +def greater_equal(x, y, name=None): + """ + Returns the truth value of (x >= y) element-wise. + + Parameters: + - x: A Tensor + - y: A Tensor. Must have the same type as x + - name: A name for the operation + + Returns: + A Tensor of type bool + """ + +def maximum(x, y, name=None): + """ + Returns the max of x and y (i.e. x > y ? x : y) element-wise. + + Parameters: + - x: A Tensor + - y: A Tensor. Must have the same type as x + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def minimum(x, y, name=None): + """ + Returns the min of x and y (i.e. x < y ? x : y) element-wise. + + Parameters: + - x: A Tensor + - y: A Tensor. Must have the same type as x + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ +``` + +## Usage Examples + +```python +import tensorflow as tf + +# Basic arithmetic +a = tf.constant([1.0, 2.0, 3.0]) +b = tf.constant([4.0, 5.0, 6.0]) + +sum_result = tf.add(a, b) # [5.0, 7.0, 9.0] +diff_result = tf.subtract(b, a) # [3.0, 3.0, 3.0] +prod_result = tf.multiply(a, b) # [4.0, 10.0, 18.0] +div_result = tf.divide(b, a) # [4.0, 2.5, 2.0] + +# Mathematical functions +sqrt_result = tf.sqrt(a) # [1.0, 1.414, 1.732] +exp_result = tf.exp(a) # [2.718, 7.389, 20.086] +log_result = tf.log(b) # [1.386, 1.609, 1.792] + +# Trigonometric functions +sin_result = tf.sin(a) # [0.841, 0.909, 0.141] +cos_result = tf.cos(a) # [0.540, -0.416, -0.990] + +# Matrix operations +matrix_a = tf.constant([[1.0, 2.0], [3.0, 4.0]]) +matrix_b = tf.constant([[5.0, 6.0], [7.0, 8.0]]) + +matmul_result = tf.matmul(matrix_a, matrix_b) # [[19.0, 22.0], [43.0, 50.0]] +transpose_result = tf.transpose(matrix_a) # [[1.0, 3.0], [2.0, 4.0]] + +# Reduction operations +tensor = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) +sum_all = tf.reduce_sum(tensor) # 21.0 +sum_axis0 = tf.reduce_sum(tensor, axis=0) # [5.0, 7.0, 9.0] +mean_result = tf.reduce_mean(tensor) # 3.5 +max_result = tf.reduce_max(tensor) # 6.0 + +# Comparisons +x = tf.constant([1, 2, 3]) +y = tf.constant([1, 4, 2]) + +eq_result = tf.equal(x, y) # [True, False, False] +gt_result = tf.greater(x, y) # [False, False, True] +max_xy = tf.maximum(x, y) # [1, 4, 3] +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-tensorflow/docs/nn.md b/.tessl/tiles/tessl/pypi-tensorflow/docs/nn.md new file mode 100644 index 0000000..287991d --- /dev/null +++ b/.tessl/tiles/tessl/pypi-tensorflow/docs/nn.md @@ -0,0 +1,498 @@ +# Neural Network Operations + +Core neural network operations including activations, convolutions, pooling, normalization, and loss functions. These operations provide the fundamental building blocks for constructing and training neural networks. + +## Capabilities + +### Activation Functions + +Non-linear activation functions that introduce non-linearity into neural networks. + +```python { .api } +def relu(features, name=None): + """ + Computes rectified linear: max(features, 0). + + Parameters: + - features: A Tensor. Must be one of the following types: float32, float64, int32, uint8, int16, int8, int64, bfloat16, uint16, half, uint32, uint64 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as features + """ + +def relu6(features, name=None): + """ + Computes Rectified Linear 6: min(max(features, 0), 6). + + Parameters: + - features: A Tensor with type float, double, int32, uint8, int16, or int8 + - name: A name for the operation + + Returns: + A Tensor with the same type as features + """ + +def elu(features, name=None): + """ + Computes exponential linear: exp(features) - 1 if < 0, features otherwise. + + Parameters: + - features: A Tensor. Must be one of the following types: half, bfloat16, float32, float64 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as features + """ + +def sigmoid(x, name=None): + """ + Computes sigmoid of x element-wise. + + Parameters: + - x: A Tensor with type float16, float32, float64, complex64, or complex128 + - name: A name for the operation + + Returns: + A Tensor with the same type as x + """ + +def tanh(x, name=None): + """ + Computes hyperbolic tangent of x element-wise. + + Parameters: + - x: A Tensor. Must be one of the following types: bfloat16, half, float32, float64, complex64, complex128 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as x + """ + +def softmax(logits, axis=None, name=None): + """ + Computes softmax activations. + + Parameters: + - logits: A non-empty Tensor. Must be one of the following types: half, bfloat16, float32, float64 + - axis: The dimension softmax would be performed on. The default is -1 which indicates the last dimension + - name: A name for the operation + + Returns: + A Tensor. Has the same type and shape as logits + """ + +def log_softmax(logits, axis=None, name=None): + """ + Computes log softmax activations. + + Parameters: + - logits: A non-empty Tensor. Must be one of the following types: half, bfloat16, float32, float64 + - axis: The dimension softmax would be performed on. The default is -1 which indicates the last dimension + - name: A name for the operation + + Returns: + A Tensor. Has the same type and shape as logits + """ + +def leaky_relu(features, alpha=0.2, name=None): + """ + Compute the Leaky ReLU activation function. + + Parameters: + - features: A Tensor representing preactivation values. Must be one of the following types: float16, float32, float64, int32, int64 + - alpha: Slope of the activation function at x < 0 + - name: A name for the operation + + Returns: + The activation value + """ + +def gelu(features, approximate=False, name=None): + """ + Compute the Gaussian Error Linear Unit (GELU) activation function. + + Parameters: + - features: A Tensor representing preactivation values + - approximate: An optional bool. Defaults to False. Whether to enable approximation + - name: A name for the operation + + Returns: + A Tensor with the same type as features + """ + +def swish(features, name=None): + """ + Computes the Swish activation function: features * sigmoid(features). + + Parameters: + - features: A Tensor representing preactivation values + - name: A name for the operation + + Returns: + The activation value + """ +``` + +### Convolution Operations + +Convolution operations for processing spatial data like images. + +```python { .api } +def conv2d(input, filters, strides, padding, use_cudnn_on_gpu=True, data_format="NHWC", + dilations=[1,1,1,1], name=None): + """ + Computes a 2-D convolution given 4-D input and filter tensors. + + Parameters: + - input: A Tensor. Must be one of the following types: half, bfloat16, float32, float64 + - filters: A Tensor. Must have the same type as input + - strides: An int or list of ints that has length 1, 2 or 4 + - padding: Either the string "SAME" or "VALID" indicating the type of padding algorithm to use + - use_cudnn_on_gpu: An optional bool. Defaults to True. Whether to use cuDNN on GPU when available + - data_format: An optional string from: "NHWC", "NCHW". Defaults to "NHWC" + - dilations: A list of ints. Defaults to [1, 1, 1, 1]. The dilation factor for each dimension of input + - name: A name for the operation + + Returns: + A Tensor. Has the same type as input + """ + +def conv2d_transpose(input, filters, output_shape, strides, padding="SAME", + data_format="NHWC", dilations=None, name=None): + """ + The transpose of conv2d. + + Parameters: + - input: A 4-D Tensor of type float and shape [batch, height, width, in_channels] for NHWC data format + - filters: A 4-D Tensor with the same type as input and shape [height, width, output_channels, in_channels] + - output_shape: A 1-D Tensor representing the output shape of the deconvolution op + - strides: An int or list of ints that has length 1, 2 or 4 + - padding: A string, either 'VALID' or 'SAME' + - data_format: A string. 'NHWC' and 'NCHW' are supported + - dilations: An int or list of ints that has length 1, 2 or 4, defaults to 1 + - name: Optional name for the returned tensor + + Returns: + A Tensor with the same type as input + """ + +def depthwise_conv2d(input, filter, strides, padding, data_format=None, + dilations=None, name=None): + """ + Depthwise 2-D convolution. + + Parameters: + - input: 4-D with shape according to data_format + - filter: 4-D with shape [filter_height, filter_width, in_channels, channel_multiplier] + - strides: 1-D of size 4. The stride of the sliding window for each dimension of input + - padding: Controls how to pad the image before applying the convolution + - data_format: The data format for input. Either "NHWC" (default) or "NCHW" + - dilations: 1-D of size 2. The dilation rate in which we sample input values + - name: A name for this operation + + Returns: + A 4-D Tensor with shape according to data_format + """ + +def separable_conv2d(input, depthwise_filter, pointwise_filter, strides, + padding, data_format=None, dilations=None, name=None): + """ + 2-D convolution with separable filters. + + Parameters: + - input: 4-D Tensor with shape according to data_format + - depthwise_filter: 4-D Tensor with shape [filter_height, filter_width, in_channels, channel_multiplier] + - pointwise_filter: 4-D Tensor with shape [1, 1, channel_multiplier * in_channels, out_channels] + - strides: 1-D of size 4. The stride of the sliding window for each dimension of input + - padding: Controls how to pad the image before applying the convolution + - data_format: The data format for input. Either "NHWC" (default) or "NCHW" + - dilations: 1-D of size 2. The dilation rate in which we sample input values + - name: A name for this operation + + Returns: + A 4-D Tensor with shape according to data_format + """ +``` + +### Pooling Operations + +Pooling operations for downsampling and feature extraction. + +```python { .api } +def max_pool2d(input, ksize, strides, padding, data_format="NHWC", name=None): + """ + Performs the max pooling on the input. + + Parameters: + - input: A 4-D Tensor of the format specified by data_format + - ksize: An int or list of ints that has length 1, 2 or 4 + - strides: An int or list of ints that has length 1, 2 or 4 + - padding: Either the string "SAME" or "VALID" indicating the type of padding algorithm to use + - data_format: A string. 'NHWC' and 'NCHW' are supported + - name: Optional name for the operation + + Returns: + A Tensor of format specified by data_format + """ + +def avg_pool2d(input, ksize, strides, padding, data_format="NHWC", name=None): + """ + Performs the average pooling on the input. + + Parameters: + - input: A 4-D Tensor of shape [batch, height, width, channels] and type float32, float64, qint8, quint8, or qint32 + - ksize: An int or list of ints that has length 1, 2 or 4 + - strides: An int or list of ints that has length 1, 2 or 4 + - padding: A string, either 'VALID' or 'SAME' + - data_format: A string. 'NHWC' and 'NCHW' are supported + - name: Optional name for the operation + + Returns: + A Tensor with the same type as input + """ + +def global_max_pool2d(input, data_format="NHWC", name=None): + """ + Performs global max pooling on the input. + + Parameters: + - input: A 4-D Tensor of the format specified by data_format + - data_format: A string. 'NHWC' and 'NCHW' are supported + - name: Optional name for the operation + + Returns: + A Tensor of format specified by data_format + """ + +def global_avg_pool2d(input, data_format="NHWC", name=None): + """ + Performs global average pooling on the input. + + Parameters: + - input: A 4-D Tensor of the format specified by data_format + - data_format: A string. 'NHWC' and 'NCHW' are supported + - name: Optional name for the operation + + Returns: + A Tensor of format specified by data_format + """ +``` + +### Normalization + +Normalization operations for training stability and performance. + +```python { .api } +def batch_normalization(x, mean, variance, offset, scale, variance_epsilon, name=None): + """ + Batch normalization. + + Parameters: + - x: Input Tensor + - mean: A mean Tensor + - variance: A variance Tensor + - offset: An offset Tensor, often denoted β in equations, or None + - scale: A scale Tensor, often denoted γ in equations, or None + - variance_epsilon: A small float number to avoid dividing by 0 + - name: A name for this operation + + Returns: + the normalized, scaled, offset tensor + """ + +def layer_normalization(inputs, begin_norm_axis=1, begin_params_axis=-1, name=None): + """ + Applies layer normalization. + + Parameters: + - inputs: A tensor with 2 or more dimensions, where the first dimension has batch_size + - begin_norm_axis: The first normalization dimension: normalization will be performed along dimensions begin_norm_axis : rank(inputs) + - begin_params_axis: Part of the standard interface, unused + - name: A name for this operation + + Returns: + A normalized Tensor with the same shape as inputs + """ + +def local_response_normalization(input, depth_radius=5, bias=1, alpha=1, beta=0.5, name=None): + """ + Local Response Normalization. + + Parameters: + - input: A Tensor. Must be one of the following types: half, bfloat16, float32 + - depth_radius: An optional int. Defaults to 5. 0-D. Half-width of the 1-D normalization window + - bias: An optional float. Defaults to 1. An offset (usually positive to avoid dividing by 0) + - alpha: An optional float. Defaults to 1. A scale factor, usually positive + - beta: An optional float. Defaults to 0.5. An exponent + - name: A name for the operation + + Returns: + A Tensor. Has the same type as input + """ + +def l2_normalize(x, axis=None, epsilon=1e-12, name=None): + """ + Normalizes along dimension axis using an L2 norm. + + Parameters: + - x: A Tensor + - axis: Dimension along which to normalize. A scalar or a vector of integers + - epsilon: A lower bound value for the norm. Will use sqrt(epsilon) as the divisor if norm < sqrt(epsilon) + - name: A name for this operation + + Returns: + A Tensor with the same shape as x + """ +``` + +### Loss Functions + +Loss functions for training neural networks. + +```python { .api } +def softmax_cross_entropy_with_logits(labels, logits, axis=-1, name=None): + """ + Computes softmax cross entropy between logits and labels. + + Parameters: + - labels: Each vector along the class dimension should hold a valid probability distribution + - logits: Per-label activations, typically a linear output + - axis: The class dimension. Defaulted to -1 which is the last dimension + - name: A name for the operation + + Returns: + A Tensor that contains the softmax cross entropy loss + """ + +def sparse_softmax_cross_entropy_with_logits(labels, logits, name=None): + """ + Computes sparse softmax cross entropy between logits and labels. + + Parameters: + - labels: Tensor of shape [...] and dtype int32 or int64 + - logits: Per-label activations of shape [..., num_classes] and dtype float16, float32, or float64 + - name: A name for the operation + + Returns: + A Tensor of the same shape as labels and of the same type as logits with the softmax cross entropy loss + """ + +def sigmoid_cross_entropy_with_logits(labels, logits, name=None): + """ + Computes sigmoid cross entropy given logits. + + Parameters: + - labels: A Tensor of the same type and shape as logits + - logits: A Tensor of type float32 or float64 + - name: A name for the operation + + Returns: + A Tensor of the same shape as logits with the componentwise logistic losses + """ + +def l2_loss(t, name=None): + """ + Computes half the L2 norm of a tensor without the sqrt. + + Parameters: + - t: A Tensor. Must be one of the following types: half, bfloat16, float32, float64 + - name: A name for the operation + + Returns: + A Tensor. Has the same type as t + """ + +def mean_squared_error(y_true, y_pred): + """ + Computes the mean squared error between labels and predictions. + + Parameters: + - y_true: Ground truth values + - y_pred: The predicted values + + Returns: + Mean squared error values + """ + +def mean_absolute_error(y_true, y_pred): + """ + Computes the mean absolute error between labels and predictions. + + Parameters: + - y_true: Ground truth values + - y_pred: The predicted values + + Returns: + Mean absolute error values + """ +``` + +### Dropout and Regularization + +Operations for regularization and preventing overfitting. + +```python { .api } +def dropout(x, rate, noise_shape=None, seed=None, training=None, name=None): + """ + Computes dropout: randomly sets elements to zero to prevent overfitting. + + Parameters: + - x: A floating point tensor + - rate: A scalar Tensor with the same type as x. The probability that each element is discarded + - noise_shape: A 1-D integer Tensor, representing the shape for randomly generated keep/drop flags + - seed: A Python integer. Used to create random seeds + - training: Either a Python boolean, or a TensorFlow boolean scalar tensor + - name: A name for this operation + + Returns: + A Tensor of the same shape of x + """ + +def spatial_dropout(x, rate, data_format="channels_last", name=None): + """ + Spatial 2D version of Dropout. + + Parameters: + - x: A 4D tensor + - rate: Float between 0 and 1. Fraction of the input units to drop + - data_format: 'channels_first' or 'channels_last' + - name: A name for this operation + + Returns: + A tensor of the same shape as x + """ +``` + +## Usage Examples + +```python +import tensorflow as tf + +# Activation functions +x = tf.constant([-2.0, -1.0, 0.0, 1.0, 2.0]) +relu_out = tf.nn.relu(x) # [0.0, 0.0, 0.0, 1.0, 2.0] +sigmoid_out = tf.nn.sigmoid(x) # [0.119, 0.269, 0.5, 0.731, 0.881] +tanh_out = tf.nn.tanh(x) # [-0.964, -0.762, 0.0, 0.762, 0.964] + +logits = tf.constant([[1.0, 2.0, 3.0], [1.0, 2.0, 3.0]]) +softmax_out = tf.nn.softmax(logits) # [[0.09, 0.244, 0.665], [0.09, 0.244, 0.665]] + +# Convolution operations +input_data = tf.random.normal([1, 32, 32, 3]) # Batch, Height, Width, Channels +filters = tf.random.normal([5, 5, 3, 64]) # Height, Width, In_channels, Out_channels +conv_out = tf.nn.conv2d(input_data, filters, strides=[1, 1, 1, 1], padding='SAME') + +# Pooling operations +max_pool_out = tf.nn.max_pool2d(conv_out, ksize=2, strides=2, padding='VALID') +avg_pool_out = tf.nn.avg_pool2d(conv_out, ksize=2, strides=2, padding='VALID') + +# Loss functions +y_true = tf.constant([0, 1, 2]) +y_pred = tf.constant([[0.1, 0.8, 0.1], [0.2, 0.7, 0.1], [0.1, 0.2, 0.7]]) +loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred) + +# Dropout +training_data = tf.random.normal([32, 128]) +dropped_out = tf.nn.dropout(training_data, rate=0.5, training=True) +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-tensorflow/docs/saved-model.md b/.tessl/tiles/tessl/pypi-tensorflow/docs/saved-model.md new file mode 100644 index 0000000..bd190df --- /dev/null +++ b/.tessl/tiles/tessl/pypi-tensorflow/docs/saved-model.md @@ -0,0 +1,340 @@ +# Model Management + +Complete model serialization, checkpointing, and deployment utilities for production and inference. These operations provide comprehensive model lifecycle management capabilities. + +## Capabilities + +### Model Saving and Loading + +Save and load complete models with all weights, architecture, and training configuration. + +```python { .api } +def save(obj, export_dir, signatures=None, options=None): + """ + Exports a tf.Module (and subclasses) obj to SavedModel format. + + Parameters: + - obj: A trackable object (e.g. tf.Module or tf.keras.Model) to export + - export_dir: A directory in which to write the SavedModel + - signatures: Optional, either a tf.function with an input signature specified or a dictionary + - options: Optional, tf.saved_model.SaveOptions object that specifies options for saving + """ + +def load(export_dir, tags=None, options=None): + """ + Load a SavedModel from export_dir. + + Parameters: + - export_dir: The SavedModel directory to load from + - tags: A tag or sequence of tags identifying the MetaGraph to load + - options: Optional, tf.saved_model.LoadOptions object that specifies options for loading + + Returns: + A trackable object with a save method + """ + +def contains_saved_model(export_dir): + """ + Checks whether the provided export directory could contain a SavedModel. + + Parameters: + - export_dir: Absolute or relative path to a directory containing the SavedModel + + Returns: + True if the export directory contains SavedModel files, False otherwise + """ +``` + +### Checkpointing + +Save and restore model weights and training state for resuming training. + +```python { .api } +class Checkpoint: + """ + Groups trackable objects, saving and restoring them. + + Methods: + - save(file_prefix): Saves a training checkpoint and provides a context manager + - restore(save_path): Restore a training checkpoint + - read(save_path): Returns CheckpointReader for checkpoint inspection + """ + + def __init__(self, **kwargs): + """ + Groups trackable objects, saving and restoring them. + + Parameters: + - **kwargs: Keyword arguments are set as attributes of this object, and are saved with the checkpoint + """ + + def save(self, file_prefix, session=None): + """ + Saves a training checkpoint and provides a context manager. + + Parameters: + - file_prefix: A prefix to use for the checkpoint filenames + - session: The session to evaluate variables in. Ignored when executing eagerly + + Returns: + The full path to the checkpoint + """ + + def restore(self, save_path): + """ + Restore a training checkpoint. + + Parameters: + - save_path: The path to the checkpoint, as returned by save or tf.train.latest_checkpoint + + Returns: + A load status object, which can be used to make assertions about the status of a checkpoint restoration + """ + + def read(self, save_path): + """ + Returns a CheckpointReader for the checkpoint. + + Parameters: + - save_path: The path to the checkpoint, as returned by save or tf.train.latest_checkpoint + + Returns: + A CheckpointReader object + """ + +class CheckpointManager: + """ + Deletes old checkpoints. + + Methods: + - save(checkpoint_number): Creates a new checkpoint + """ + + def __init__(self, checkpoint, directory, max_to_keep=5, keep_checkpoint_every_n_hours=None, + checkpoint_name="ckpt", step_counter=None, checkpoint_interval=None, + init_fn=None): + """ + Deletes old checkpoints. + + Parameters: + - checkpoint: The tf.train.Checkpoint instance to save and manage checkpoints for + - directory: The path to a directory in which to write checkpoints + - max_to_keep: An integer, the number of checkpoints to keep + - keep_checkpoint_every_n_hours: Upon removal, keep checkpoints every N hours + - checkpoint_name: Custom name for the checkpoint file + - step_counter: A tf.Variable instance for checking the current step counter value + - checkpoint_interval: An integer, indicates that keep_checkpoint_every_n_hours should be based on checkpoints saved every checkpoint_interval steps + - init_fn: Callable. Function executed the first time a checkpoint is saved + """ + + def save(self, checkpoint_number=None, check_interval=True): + """ + Creates a new checkpoint and manages deletion of old checkpoints. + + Parameters: + - checkpoint_number: An optional integer, or an integer-dtype Variable or Tensor, used to number the checkpoint + - check_interval: An optional boolean. The default behaviour is that checkpoint_interval is ignored when checkpoint_number is provided + + Returns: + The path to the new checkpoint. It is also recorded in the checkpoints and latest_checkpoint properties + """ +``` + +### Checkpoint Utilities + +Utility functions for working with checkpoints. + +```python { .api } +def list_variables(checkpoint_dir): + """ + Returns list of all variables in the checkpoint. + + Parameters: + - checkpoint_dir: Directory with checkpoint file or path to checkpoint + + Returns: + List of tuples (name, shape) for all variables in the checkpoint + """ + +def load_checkpoint(checkpoint_dir): + """ + Returns CheckpointReader for checkpoint found in checkpoint_dir. + + Parameters: + - checkpoint_dir: Directory with checkpoint file or path to checkpoint + + Returns: + CheckpointReader instance + """ + +def load_variable(checkpoint_dir, name): + """ + Returns the tensor value of the given variable in the checkpoint. + + Parameters: + - checkpoint_dir: Directory with checkpoint file or path to checkpoint + - name: Name of the variable to return + + Returns: + A numpy ndarray with a copy of the value of this variable + """ + +def latest_checkpoint(checkpoint_dir, latest_filename=None): + """ + Finds the filename of latest saved checkpoint file. + + Parameters: + - checkpoint_dir: Directory where the variables were saved + - latest_filename: Optional name for the protocol buffer file that contains the list of most recent checkpoint filenames + + Returns: + The full path to the latest checkpoint or None if no checkpoint was found + """ +``` + +### SavedModel Utilities + +Additional utilities for working with SavedModel format. + +```python { .api } +class SaveOptions: + """ + Options for saving to SavedModel. + + Parameters: + - namespace_whitelist: List of strings containing op namespaces to whitelist when saving a model + - save_debug_info: Boolean indicating whether debug information is saved + - function_aliases: Optional dictionary of string -> string of function aliases + - experimental_io_device: string. Applies in a distributed setting + - experimental_variable_policy: The policy to apply to variables when saving + """ + +class LoadOptions: + """ + Options for loading a SavedModel. + + Parameters: + - allow_partial_checkpoint: Boolean. Defaults to False. When enabled, allows the SavedModel checkpoint to be missing variables + - experimental_io_device: string. Loads SavedModel and variables on the specified device + - experimental_skip_checkpoint: boolean. If True, the checkpoint will not be loaded, and the SavedModel will be loaded with randomly initialized variable values + """ + +class Asset: + """ + Represents a file asset to copy into the SavedModel. + + Parameters: + - path: A path, or a 0-D tf.string Tensor with path to the asset + """ +``` + +## Usage Examples + +```python +import tensorflow as tf +import os + +# Create a simple model +model = tf.keras.Sequential([ + tf.keras.layers.Dense(64, activation='relu', input_shape=(10,)), + tf.keras.layers.Dense(32, activation='relu'), + tf.keras.layers.Dense(1, activation='sigmoid') +]) + +model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) + +# Save entire model to SavedModel format +tf.saved_model.save(model, 'my_saved_model') + +# Load the saved model +loaded_model = tf.saved_model.load('my_saved_model') + +# For Keras models, use keras save/load for full functionality +model.save('my_keras_model.h5') +loaded_keras_model = tf.keras.models.load_model('my_keras_model.h5') + +# Checkpoint example +checkpoint_dir = './training_checkpoints' +checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt") + +# Create checkpoint object +checkpoint = tf.train.Checkpoint(optimizer=tf.keras.optimizers.Adam(), + model=model) + +# Save checkpoint +checkpoint.save(file_prefix=checkpoint_prefix) + +# Restore from checkpoint +checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir)) + +# Using CheckpointManager for automatic cleanup +manager = tf.train.CheckpointManager( + checkpoint, directory=checkpoint_dir, max_to_keep=3 +) + +# Save with automatic cleanup +save_path = manager.save() +print(f"Saved checkpoint for step {step}: {save_path}") + +# Training loop with checkpointing +optimizer = tf.keras.optimizers.Adam() +checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model) +manager = tf.train.CheckpointManager(checkpoint, './checkpoints', max_to_keep=3) + +# Restore if checkpoint exists +checkpoint.restore(manager.latest_checkpoint) +if manager.latest_checkpoint: + print(f"Restored from {manager.latest_checkpoint}") +else: + print("Initializing from scratch.") + +# Training step function +@tf.function +def train_step(x, y): + with tf.GradientTape() as tape: + predictions = model(x, training=True) + loss = tf.keras.losses.binary_crossentropy(y, predictions) + + gradients = tape.gradient(loss, model.trainable_variables) + optimizer.apply_gradients(zip(gradients, model.trainable_variables)) + + return loss + +# Training loop +for epoch in range(10): + # Training code here... + # x_batch, y_batch = get_batch() + # loss = train_step(x_batch, y_batch) + + # Save checkpoint every few epochs + if epoch % 2 == 0: + save_path = manager.save() + print(f"Saved checkpoint for epoch {epoch}: {save_path}") + +# Inspect checkpoint contents +checkpoint_path = tf.train.latest_checkpoint(checkpoint_dir) +if checkpoint_path: + variables = tf.train.list_variables(checkpoint_path) + for name, shape in variables: + print(f"Variable: {name}, Shape: {shape}") + + # Load specific variable + specific_var = tf.train.load_variable(checkpoint_path, 'model/dense/kernel/.ATTRIBUTES/VARIABLE_VALUE') + print(f"Loaded variable shape: {specific_var.shape}") + +# Check if directory contains SavedModel +if tf.saved_model.contains_saved_model('my_saved_model'): + print("Directory contains a valid SavedModel") + +# Advanced SavedModel with custom signatures +@tf.function(input_signature=[tf.TensorSpec(shape=[None, 10], dtype=tf.float32)]) +def inference_func(x): + return model(x) + +# Save with custom signature +tf.saved_model.save( + model, + 'model_with_signature', + signatures={'serving_default': inference_func} +) +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-tensorflow/tile.json b/.tessl/tiles/tessl/pypi-tensorflow/tile.json new file mode 100644 index 0000000..8720748 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-tensorflow/tile.json @@ -0,0 +1,7 @@ +{ + "name": "tessl/pypi-tensorflow", + "version": "2.20.0", + "docs": "docs/index.md", + "describes": "pkg:pypi/tensorflow@2.20.0", + "summary": "An end-to-end open source platform for machine learning" +} \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-testcontainers/docs/cache-messaging.md b/.tessl/tiles/tessl/pypi-testcontainers/docs/cache-messaging.md new file mode 100644 index 0000000..5d1cc66 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-testcontainers/docs/cache-messaging.md @@ -0,0 +1,460 @@ +# Cache and Messaging Containers + +Containers for caching systems, message queues, and pub/sub services including Redis, Kafka, RabbitMQ, NATS, and other messaging brokers with integrated client support and service-specific configurations. + +## Capabilities + +### Redis Container + +Redis in-memory data store container with authentication support, both synchronous and asynchronous client integration. + +```python { .api } +class RedisContainer: + def __init__( + self, + image: str = "redis:latest", + port: int = 6379, + password: Optional[str] = None, + **kwargs: Any + ): + """ + Initialize Redis container. + + Args: + image: Redis Docker image + port: Redis port (default 6379) + password: Redis authentication password + **kwargs: Additional container options + """ + + def get_client(self, **kwargs: Any): + """ + Get configured Redis client. + + Args: + **kwargs: Additional redis-py client arguments + + Returns: + Redis client instance + """ + +class AsyncRedisContainer(RedisContainer): + def get_async_client(self, **kwargs: Any): + """ + Get configured async Redis client. + + Args: + **kwargs: Additional redis-py async client arguments + + Returns: + Async Redis client instance + """ +``` + +### Kafka Container + +Apache Kafka distributed streaming platform container with KRaft mode support and bootstrap server configuration. + +```python { .api } +class KafkaContainer: + def __init__( + self, + image: str = "confluentinc/cp-kafka:7.6.0", + port: int = 9093, + **kwargs: Any + ): + """ + Initialize Kafka container. + + Args: + image: Kafka Docker image + port: Kafka port (default 9093) + **kwargs: Additional container options + """ + + def get_bootstrap_server(self) -> str: + """ + Get Kafka bootstrap server address. + + Returns: + Bootstrap server address string (host:port) + """ + + def with_kraft(self) -> "KafkaContainer": + """ + Enable KRaft mode (Kafka without Zookeeper). + + Returns: + Self for method chaining + """ + + def with_cluster_id(self, cluster_id: str) -> "KafkaContainer": + """ + Set Kafka cluster ID for KRaft mode. + + Args: + cluster_id: Cluster identifier + + Returns: + Self for method chaining + """ +``` + +### RabbitMQ Container + +RabbitMQ message broker container with management interface and authentication configuration. + +```python { .api } +class RabbitMqContainer: + def __init__( + self, + image: str = "rabbitmq:3-management", + port: int = 5672, + username: str = "guest", + password: str = "guest", + **kwargs: Any + ): + """ + Initialize RabbitMQ container. + + Args: + image: RabbitMQ Docker image + port: AMQP port (default 5672) + username: RabbitMQ username + password: RabbitMQ password + **kwargs: Additional container options + """ + + def get_connection_url(self) -> str: + """ + Get RabbitMQ connection URL. + + Returns: + AMQP connection URL string + """ +``` + +### NATS Container + +NATS messaging system container for high-performance pub/sub and streaming communication. + +```python { .api } +class NatsContainer: + def __init__( + self, + image: str = "nats:latest", + port: int = 4222, + **kwargs: Any + ): + """ + Initialize NATS container. + + Args: + image: NATS Docker image + port: NATS port (default 4222) + **kwargs: Additional container options + """ + + def get_connection_url(self) -> str: + """ + Get NATS connection URL. + + Returns: + NATS connection URL string + """ +``` + +### MQTT Container + +MQTT message broker container for IoT and lightweight messaging scenarios. + +```python { .api } +class MqttContainer: + def __init__( + self, + image: str = "eclipse-mosquitto:latest", + port: int = 1883, + **kwargs: Any + ): + """ + Initialize MQTT broker container. + + Args: + image: MQTT broker Docker image + port: MQTT port (default 1883) + **kwargs: Additional container options + """ + + def get_connection_url(self) -> str: + """ + Get MQTT broker URL. + + Returns: + MQTT broker URL string + """ +``` + +### Memcached Container + +Memcached distributed memory caching system container for high-performance caching. + +```python { .api } +class MemcachedContainer: + def __init__( + self, + image: str = "memcached:latest", + port: int = 11211, + **kwargs: Any + ): + """ + Initialize Memcached container. + + Args: + image: Memcached Docker image + port: Memcached port (default 11211) + **kwargs: Additional container options + """ + + def get_connection_url(self) -> str: + """ + Get Memcached connection URL. + + Returns: + Memcached connection URL string + """ +``` + +## Usage Examples + +### Redis Caching + +```python +from testcontainers.redis import RedisContainer +import redis + +with RedisContainer("redis:6-alpine") as redis_container: + # Get Redis client + client = redis_container.get_client() + + # Basic Redis operations + client.set("key1", "value1") + client.hset("user:1", "name", "John", "email", "john@example.com") + + # Retrieve values + value = client.get("key1") + user_data = client.hgetall("user:1") + + print(f"Cached value: {value.decode()}") + print(f"User data: {user_data}") + + # List operations + client.lpush("tasks", "task1", "task2", "task3") + tasks = client.lrange("tasks", 0, -1) + print(f"Tasks: {[task.decode() for task in tasks]}") +``` + +### Async Redis Usage + +```python +from testcontainers.redis import AsyncRedisContainer +import asyncio + +async def async_redis_example(): + with AsyncRedisContainer("redis:6") as redis_container: + # Get async Redis client + client = redis_container.get_async_client() + + # Async Redis operations + await client.set("async_key", "async_value") + value = await client.get("async_key") + + print(f"Async value: {value.decode()}") + + # Close the client + await client.close() + +# Run the async example +asyncio.run(async_redis_example()) +``` + +### Kafka Messaging + +```python +from testcontainers.kafka import KafkaContainer +from kafka import KafkaProducer, KafkaConsumer +import json + +with KafkaContainer() as kafka: + bootstrap_server = kafka.get_bootstrap_server() + + # Create producer + producer = KafkaProducer( + bootstrap_servers=[bootstrap_server], + value_serializer=lambda x: json.dumps(x).encode('utf-8') + ) + + # Send messages + for i in range(5): + message = {"id": i, "message": f"Hello Kafka {i}"} + producer.send("test-topic", message) + + producer.flush() + producer.close() + + # Create consumer + consumer = KafkaConsumer( + "test-topic", + bootstrap_servers=[bootstrap_server], + value_deserializer=lambda m: json.loads(m.decode('utf-8')) + ) + + # Consume messages + for message in consumer: + print(f"Received: {message.value}") + if message.value["id"] >= 4: # Stop after receiving all messages + break + + consumer.close() +``` + +### RabbitMQ Message Queue + +```python +from testcontainers.rabbitmq import RabbitMqContainer +import pika +import json + +with RabbitMqContainer() as rabbitmq: + connection_url = rabbitmq.get_connection_url() + + # Connect to RabbitMQ + connection = pika.BlockingConnection(pika.URLParameters(connection_url)) + channel = connection.channel() + + # Declare queue + queue_name = "task_queue" + channel.queue_declare(queue=queue_name, durable=True) + + # Publish messages + for i in range(3): + message = {"task_id": i, "data": f"Task {i} data"} + channel.basic_publish( + exchange="", + routing_key=queue_name, + body=json.dumps(message), + properties=pika.BasicProperties(delivery_mode=2) # Make message persistent + ) + print(f"Sent task {i}") + + # Consume messages + def callback(ch, method, properties, body): + message = json.loads(body) + print(f"Received task: {message}") + ch.basic_ack(delivery_tag=method.delivery_tag) + + channel.basic_consume(queue=queue_name, on_message_callback=callback) + + # Process a few messages + for _ in range(3): + channel.process_data_events(time_limit=1) + + connection.close() +``` + +### Multi-Service Messaging Setup + +```python +from testcontainers.redis import RedisContainer +from testcontainers.kafka import KafkaContainer +from testcontainers.rabbitmq import RabbitMqContainer +from testcontainers.core.network import Network + +# Create shared network +with Network() as network: + # Start multiple messaging services + with RedisContainer("redis:6") as redis, \ + KafkaContainer() as kafka, \ + RabbitMqContainer() as rabbitmq: + + # Connect to network + redis.with_network(network).with_network_aliases("redis") + kafka.with_network(network).with_network_aliases("kafka") + rabbitmq.with_network(network).with_network_aliases("rabbitmq") + + # Get service endpoints + redis_client = redis.get_client() + kafka_bootstrap = kafka.get_bootstrap_server() + rabbitmq_url = rabbitmq.get_connection_url() + + # Use services together in application architecture + print(f"Redis available: {redis_client.ping()}") + print(f"Kafka bootstrap: {kafka_bootstrap}") + print(f"RabbitMQ URL: {rabbitmq_url}") +``` + +### NATS Pub/Sub + +```python +from testcontainers.nats import NatsContainer +import asyncio +import nats + +async def nats_example(): + with NatsContainer() as nats_container: + connection_url = nats_container.get_connection_url() + + # Connect to NATS + nc = await nats.connect(connection_url) + + # Subscribe to subject + async def message_handler(msg): + subject = msg.subject + data = msg.data.decode() + print(f"Received message on {subject}: {data}") + + await nc.subscribe("updates", cb=message_handler) + + # Publish messages + for i in range(3): + await nc.publish("updates", f"Update {i}".encode()) + + # Allow time for message processing + await asyncio.sleep(1) + + await nc.close() + +# Run the async example +asyncio.run(nats_example()) +``` + +## Configuration Examples + +### Redis with Custom Configuration + +```python +from testcontainers.redis import RedisContainer + +# Redis with password authentication +redis = RedisContainer("redis:6") \ + .with_env("REDIS_PASSWORD", "mypassword") \ + .with_command("redis-server --requirepass mypassword") + +with redis: + client = redis.get_client(password="mypassword") + client.set("protected_key", "protected_value") +``` + +### Kafka with KRaft Mode + +```python +from testcontainers.kafka import KafkaContainer + +# Kafka without Zookeeper using KRaft +kafka = KafkaContainer("confluentinc/cp-kafka:7.6.0") \ + .with_kraft() \ + .with_cluster_id("test-cluster-id") + +with kafka: + bootstrap_server = kafka.get_bootstrap_server() + print(f"KRaft Kafka available at: {bootstrap_server}") +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-testcontainers/docs/cloud-services.md b/.tessl/tiles/tessl/pypi-testcontainers/docs/cloud-services.md new file mode 100644 index 0000000..b30eb58 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-testcontainers/docs/cloud-services.md @@ -0,0 +1,488 @@ +# Cloud Services Integration + +Containers for cloud service emulation and integration including LocalStack for AWS services, Azure emulators, and Google Cloud Platform services for local development and testing without cloud dependencies. + +## Capabilities + +### LocalStack Container + +LocalStack provides a local AWS cloud service emulator supporting S3, DynamoDB, Lambda, SQS, SNS, and many other AWS services for local development and testing. + +```python { .api } +class LocalStackContainer: + def __init__( + self, + image: str = "localstack/localstack:2.0.1", + edge_port: int = 4566, + region_name: Optional[str] = None, + **kwargs: Any + ): + """ + Initialize LocalStack container. + + Args: + image: LocalStack Docker image + edge_port: LocalStack edge port (default 4566) + region_name: AWS region name (default us-east-1) + **kwargs: Additional container options + """ + + def with_services(self, *services: str) -> "LocalStackContainer": + """ + Restrict LocalStack to specific AWS services. + + Args: + *services: AWS service names (s3, dynamodb, lambda, sqs, etc.) + + Returns: + Self for method chaining + """ + + def get_url(self) -> str: + """ + Get LocalStack endpoint URL. + + Returns: + LocalStack endpoint URL string + """ + + def get_client(self, name: str, **kwargs: Any): + """ + Get boto3 client configured for LocalStack. + + Args: + name: AWS service name (s3, dynamodb, etc.) + **kwargs: Additional boto3 client arguments + + Returns: + Configured boto3 client instance + """ +``` + +### Azure Services Container + +Azure service emulators for local development including Azurite for Azure Storage and Cosmos DB emulator. + +```python { .api } +class AzuriteContainer: + def __init__( + self, + image: str = "mcr.microsoft.com/azure-storage/azurite:latest", + **kwargs: Any + ): + """ + Initialize Azurite Azure Storage emulator container. + + Args: + image: Azurite Docker image + **kwargs: Additional container options + """ + + def get_connection_string(self) -> str: + """ + Get Azure Storage connection string. + + Returns: + Azure Storage connection string + """ + +class CosmosDbContainer: + def __init__( + self, + image: str = "mcr.microsoft.com/cosmosdb/linux/azure-cosmos-emulator:latest", + **kwargs: Any + ): + """ + Initialize Cosmos DB emulator container. + + Args: + image: Cosmos DB emulator Docker image + **kwargs: Additional container options + """ + + def get_connection_string(self) -> str: + """ + Get Cosmos DB connection string. + + Returns: + Cosmos DB connection string + """ +``` + +### Google Cloud Services Container + +Google Cloud Platform service emulators for local development and testing. + +```python { .api } +class GoogleCloudContainer: + def __init__( + self, + image: str = "gcr.io/google.com/cloudsdktool/cloud-sdk:latest", + **kwargs: Any + ): + """ + Initialize Google Cloud services container. + + Args: + image: Google Cloud SDK Docker image + **kwargs: Additional container options + """ + + def get_pubsub_emulator_host(self) -> str: + """ + Get Pub/Sub emulator host. + + Returns: + Pub/Sub emulator host string + """ + + def get_datastore_emulator_host(self) -> str: + """ + Get Datastore emulator host. + + Returns: + Datastore emulator host string + """ +``` + +## Usage Examples + +### LocalStack AWS Services + +```python +from testcontainers.localstack import LocalStackContainer +import boto3 + +# Start LocalStack with specific services +with LocalStackContainer() as localstack: + localstack.with_services("s3", "dynamodb", "sqs", "sns") + + # Get AWS clients configured for LocalStack + s3_client = localstack.get_client("s3") + dynamodb_client = localstack.get_client("dynamodb") + sqs_client = localstack.get_client("sqs") + + # S3 operations + bucket_name = "test-bucket" + s3_client.create_bucket(Bucket=bucket_name) + + # Upload file to S3 + s3_client.put_object( + Bucket=bucket_name, + Key="test-file.txt", + Body=b"Hello, LocalStack S3!" + ) + + # List S3 objects + response = s3_client.list_objects_v2(Bucket=bucket_name) + print(f"S3 objects: {[obj['Key'] for obj in response.get('Contents', [])]}") + + # DynamoDB operations + table_name = "test-table" + dynamodb_client.create_table( + TableName=table_name, + KeySchema=[ + {"AttributeName": "id", "KeyType": "HASH"} + ], + AttributeDefinitions=[ + {"AttributeName": "id", "AttributeType": "S"} + ], + BillingMode="PAY_PER_REQUEST" + ) + + # Put item in DynamoDB + dynamodb_client.put_item( + TableName=table_name, + Item={ + "id": {"S": "test-id"}, + "name": {"S": "Test Item"}, + "value": {"N": "42"} + } + ) + + # Get item from DynamoDB + response = dynamodb_client.get_item( + TableName=table_name, + Key={"id": {"S": "test-id"}} + ) + print(f"DynamoDB item: {response['Item']}") + + # SQS operations + queue_name = "test-queue" + queue_url = sqs_client.create_queue(QueueName=queue_name)["QueueUrl"] + + # Send message to SQS + sqs_client.send_message( + QueueUrl=queue_url, + MessageBody="Hello, LocalStack SQS!" + ) + + # Receive message from SQS + messages = sqs_client.receive_message(QueueUrl=queue_url) + if "Messages" in messages: + for message in messages["Messages"]: + print(f"SQS message: {message['Body']}") +``` + +### Azure Storage with Azurite + +```python +from testcontainers.azurite import AzuriteContainer +from azure.storage.blob import BlobServiceClient + +with AzuriteContainer() as azurite: + # Get connection string + connection_string = azurite.get_connection_string() + + # Create blob service client + blob_service = BlobServiceClient.from_connection_string(connection_string) + + # Create container + container_name = "test-container" + blob_service.create_container(container_name) + + # Upload blob + blob_name = "test-blob.txt" + blob_data = b"Hello, Azurite!" + blob_client = blob_service.get_blob_client( + container=container_name, + blob=blob_name + ) + blob_client.upload_blob(blob_data, overwrite=True) + + # Download blob + download_stream = blob_client.download_blob() + downloaded_data = download_stream.readall() + print(f"Downloaded: {downloaded_data.decode()}") + + # List blobs in container + container_client = blob_service.get_container_client(container_name) + blobs = list(container_client.list_blobs()) + print(f"Blobs: {[blob.name for blob in blobs]}") +``` + +### Google Cloud Pub/Sub Emulator + +```python +from testcontainers.google import GoogleCloudContainer +from google.cloud import pubsub_v1 +import os + +with GoogleCloudContainer() as gcp: + # Set environment variable for emulator + pubsub_host = gcp.get_pubsub_emulator_host() + os.environ["PUBSUB_EMULATOR_HOST"] = pubsub_host + + # Create publisher and subscriber clients + publisher = pubsub_v1.PublisherClient() + subscriber = pubsub_v1.SubscriberClient() + + # Create topic + project_id = "test-project" + topic_name = "test-topic" + topic_path = publisher.topic_path(project_id, topic_name) + publisher.create_topic(request={"name": topic_path}) + + # Create subscription + subscription_name = "test-subscription" + subscription_path = subscriber.subscription_path(project_id, subscription_name) + subscriber.create_subscription( + request={"name": subscription_path, "topic": topic_path} + ) + + # Publish messages + for i in range(5): + message = f"Message {i}" + future = publisher.publish(topic_path, message.encode()) + print(f"Published message ID: {future.result()}") + + # Pull messages + def callback(message): + print(f"Received: {message.data.decode()}") + message.ack() + + streaming_pull_future = subscriber.subscribe(subscription_path, callback=callback) + + # Wait for messages (in real scenario, you'd have a proper event loop) + import time + time.sleep(2) + streaming_pull_future.cancel() +``` + +### Multi-Cloud Development Environment + +```python +from testcontainers.localstack import LocalStackContainer +from testcontainers.azurite import AzuriteContainer +from testcontainers.google import GoogleCloudContainer +from testcontainers.core.network import Network + +# Create shared network for cloud services +with Network() as network: + # Start multiple cloud service emulators + with LocalStackContainer() as aws, \ + AzuriteContainer() as azure, \ + GoogleCloudContainer() as gcp: + + # Configure AWS services + aws.with_services("s3", "dynamodb", "lambda") + aws.with_network(network).with_network_aliases("aws") + + # Configure Azure services + azure.with_network(network).with_network_aliases("azure") + + # Configure GCP services + gcp.with_network(network).with_network_aliases("gcp") + + # Get service endpoints + aws_url = aws.get_url() + azure_conn = azure.get_connection_string() + pubsub_host = gcp.get_pubsub_emulator_host() + + print(f"AWS LocalStack: {aws_url}") + print(f"Azure Storage: Available") + print(f"GCP Pub/Sub: {pubsub_host}") + + # Use services together in multi-cloud application + # AWS for compute and storage + # Azure for blob storage + # GCP for messaging +``` + +### Lambda Function Testing with LocalStack + +```python +from testcontainers.localstack import LocalStackContainer +import boto3 +import json +import zipfile +import io + +def create_lambda_zip(): + """Create a simple Lambda function ZIP.""" + lambda_code = ''' +def lambda_handler(event, context): + return { + 'statusCode': 200, + 'body': json.dumps(f'Hello from Lambda! Event: {event}') + } +''' + + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: + zip_file.writestr('lambda_function.py', lambda_code) + + return zip_buffer.getvalue() + +with LocalStackContainer() as localstack: + localstack.with_services("lambda", "iam") + + # Get Lambda client + lambda_client = localstack.get_client("lambda") + iam_client = localstack.get_client("iam") + + # Create IAM role for Lambda + role_name = "lambda-role" + trust_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": {"Service": "lambda.amazonaws.com"}, + "Action": "sts:AssumeRole" + } + ] + } + + role_response = iam_client.create_role( + RoleName=role_name, + AssumeRolePolicyDocument=json.dumps(trust_policy) + ) + role_arn = role_response["Role"]["Arn"] + + # Create Lambda function + function_name = "test-function" + lambda_client.create_function( + FunctionName=function_name, + Runtime="python3.9", + Role=role_arn, + Handler="lambda_function.lambda_handler", + Code={"ZipFile": create_lambda_zip()}, + Description="Test Lambda function" + ) + + # Invoke Lambda function + response = lambda_client.invoke( + FunctionName=function_name, + InvocationType="RequestResponse", + Payload=json.dumps({"test": "data"}) + ) + + result = json.loads(response["Payload"].read()) + print(f"Lambda response: {result}") +``` + +### Cloud Storage Integration Testing + +```python +from testcontainers.localstack import LocalStackContainer +from testcontainers.azurite import AzuriteContainer +import boto3 +from azure.storage.blob import BlobServiceClient +import json + +class CloudStorageTest: + def __init__(self): + self.test_data = {"message": "Hello, Cloud Storage!", "timestamp": "2023-01-01T12:00:00Z"} + + def test_aws_s3(self, localstack): + """Test AWS S3 storage.""" + s3_client = localstack.get_client("s3") + + # Create bucket and upload data + bucket = "test-bucket" + s3_client.create_bucket(Bucket=bucket) + s3_client.put_object( + Bucket=bucket, + Key="test-data.json", + Body=json.dumps(self.test_data), + ContentType="application/json" + ) + + # Download and verify + response = s3_client.get_object(Bucket=bucket, Key="test-data.json") + downloaded_data = json.loads(response["Body"].read()) + assert downloaded_data == self.test_data + print("AWS S3 test passed") + + def test_azure_blob(self, azurite): + """Test Azure Blob storage.""" + blob_service = BlobServiceClient.from_connection_string( + azurite.get_connection_string() + ) + + # Create container and upload data + container = "test-container" + blob_service.create_container(container) + blob_client = blob_service.get_blob_client(container, "test-data.json") + blob_client.upload_blob( + json.dumps(self.test_data), + blob_type="BlockBlob", + overwrite=True + ) + + # Download and verify + downloaded_data = json.loads(blob_client.download_blob().readall()) + assert downloaded_data == self.test_data + print("Azure Blob test passed") + +# Run tests with both cloud providers +with LocalStackContainer() as aws, AzuriteContainer() as azure: + aws.with_services("s3") + + test_suite = CloudStorageTest() + test_suite.test_aws_s3(aws) + test_suite.test_azure_blob(azure) + + print("All cloud storage tests passed!") +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-testcontainers/docs/compose.md b/.tessl/tiles/tessl/pypi-testcontainers/docs/compose.md new file mode 100644 index 0000000..8f10153 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-testcontainers/docs/compose.md @@ -0,0 +1,432 @@ +# Docker Compose Orchestration + +Complete Docker Compose integration for managing multi-container environments, service discovery, and complex application stacks during testing. Enables full orchestration of interconnected services with configuration management and lifecycle control. + +## Capabilities + +### Compose Environment Management + +Manage entire Docker Compose environments with automatic service startup, configuration loading, and coordinated shutdown. + +```python { .api } +@dataclass +class DockerCompose: + context: Union[str, PathLike[str]] + compose_file_name: Optional[Union[str, list[str]]] = None + pull: bool = False + build: bool = False + wait: bool = True + keep_volumes: bool = False + env_file: Optional[str] = None + services: Optional[list[str]] = None + docker_command_path: Optional[str] = None + profiles: Optional[list[str]] = None + """ + Initialize Docker Compose environment. + + Args: + context: Path to compose context directory + compose_file_name: Compose file name (default: docker-compose.yml) + pull: Pull images before starting + build: Build images before starting + wait: Wait for services to be ready + keep_volumes: Preserve volumes on shutdown + env_file: Environment file path + services: Specific services to run + docker_command_path: Custom docker-compose command path + profiles: Compose profiles to activate + **kwargs: Additional compose options + """ + + def start(self) -> "DockerCompose": + """ + Start the compose environment. + + Returns: + Self for method chaining + """ + + def stop(self, down: bool = True) -> None: + """ + Stop the compose environment. + + Args: + down: Use 'docker-compose down' instead of 'stop' + """ + + def __enter__(self) -> "DockerCompose": + """Context manager entry - starts compose environment.""" + + def __exit__(self, exc_type, exc_val, exc_tb) -> None: + """Context manager exit - stops compose environment.""" +``` + +### Service Discovery and Access + +Access individual services within the compose environment, retrieve connection information, and interact with running containers. + +```python { .api } +def get_container(self, service_name: Optional[str] = None, include_all: bool = False) -> ComposeContainer: + """ + Get container for specific service. + + Args: + service_name: Service name (first service if None) + include_all: Include stopped containers + + Returns: + ComposeContainer instance + """ + +def get_containers(self, include_all: bool = False) -> list[ComposeContainer]: + """ + Get all containers in the compose environment. + + Args: + include_all: Include stopped containers + + Returns: + List of ComposeContainer instances + """ + +def get_service_host(self, service_name: Optional[str] = None, port: Optional[int] = None) -> str: + """ + Get host address for service. + + Args: + service_name: Service name + port: Service port + + Returns: + Host address string + """ + +def get_service_port(self, service_name: Optional[str] = None, port: Optional[int] = None) -> int: + """ + Get mapped port for service. + + Args: + service_name: Service name + port: Internal service port + + Returns: + Mapped host port number + """ + +def get_service_host_and_port(self, service_name: Optional[str] = None, port: Optional[int] = None) -> tuple[str, int]: + """ + Get host and port for service. + + Args: + service_name: Service name + port: Internal service port + + Returns: + Tuple of (host, port) + """ +``` + +### Container Operations + +Execute commands in running services, retrieve logs, and interact with the compose environment. + +```python { .api } +def exec_in_container(self, command: str, service_name: Optional[str] = None) -> str: + """ + Execute command in service container. + + Args: + command: Command to execute + service_name: Target service name + + Returns: + Command output string + """ + +def get_logs(self, *services: str) -> str: + """ + Get logs from services. + + Args: + *services: Service names (all services if none specified) + + Returns: + Combined log output string + """ + +def get_config( + self, + path_resolution: bool = True, + normalize: bool = True, + interpolate: bool = True +) -> dict: + """ + Get compose configuration. + + Args: + path_resolution: Resolve file paths + normalize: Normalize configuration format + interpolate: Interpolate environment variables + + Returns: + Compose configuration dictionary + """ +``` + +### Service Health Checking + +Wait for services to become available and ready for connections. + +```python { .api } +def wait_for(self, url: str) -> None: + """ + Wait for URL to become available. + + Args: + url: URL to check for availability + """ +``` + +### Container Information + +Access detailed information about individual containers within the compose environment. + +```python { .api } +class ComposeContainer: + ID: str # Container ID + Name: str # Container name + Command: str # Container command + Project: str # Compose project name + Service: str # Service name + State: str # Container state + Health: str # Health status + ExitCode: int # Exit code + Publishers: list[PublishedPortModel] # Published ports + + def get_publisher( + self, + by_port: Optional[int] = None, + by_host: Optional[str] = None, + prefer_ip_version: str = "IPv4" + ) -> PublishedPortModel: + """ + Get port publisher information. + + Args: + by_port: Filter by port number + by_host: Filter by host address + prefer_ip_version: Preferred IP version ("IPv4" or "IPv6") + + Returns: + PublishedPortModel instance + """ + +class PublishedPortModel: + URL: str # Published URL + TargetPort: int # Target container port + PublishedPort: int # Published host port + Protocol: str # Protocol (tcp/udp) + + def normalize(self) -> "PublishedPortModel": + """ + Normalize for Windows compatibility. + + Returns: + Normalized PublishedPortModel + """ +``` + +## Usage Examples + +### Basic Compose Usage + +```python +from testcontainers.compose import DockerCompose +import requests + +# docker-compose.yml in current directory with web and db services +with DockerCompose(".") as compose: + # Get service endpoints + web_host = compose.get_service_host("web", 80) + web_port = compose.get_service_port("web", 80) + + # Make request to web service + response = requests.get(f"http://{web_host}:{web_port}/health") + assert response.status_code == 200 + + # Get database connection info + db_host = compose.get_service_host("db", 5432) + db_port = compose.get_service_port("db", 5432) + print(f"Database available at {db_host}:{db_port}") +``` + +### Custom Compose File + +```python +from testcontainers.compose import DockerCompose + +# Use specific compose file and environment +compose = DockerCompose( + context="./docker", + compose_file_name="docker-compose.test.yml", + pull=True, # Pull latest images + build=True, # Build custom images + env_file="test.env" +) + +with compose: + # Execute command in service + result = compose.exec_in_container("ls -la", service_name="app") + print(f"Container contents: {result}") + + # Get logs from specific services + logs = compose.get_logs("app", "worker") + print(f"Service logs: {logs}") +``` + +### Service-Specific Operations + +```python +from testcontainers.compose import DockerCompose + +with DockerCompose(".", compose_file_name="microservices.yml") as compose: + # Get all running containers + containers = compose.get_containers() + + for container in containers: + print(f"Service: {container.Service}") + print(f"State: {container.State}") + print(f"Health: {container.Health}") + + # Get port information + for publisher in container.Publishers: + print(f"Port {publisher.TargetPort} -> {publisher.PublishedPort}") + + # Access specific service container + api_container = compose.get_container("api") + print(f"API container ID: {api_container.ID}") +``` + +### Profile-Based Deployment + +```python +from testcontainers.compose import DockerCompose + +# Use compose profiles for different environments +test_compose = DockerCompose( + context=".", + profiles=["test", "monitoring"], + services=["app", "db", "redis"] # Only start specific services +) + +with test_compose: + # Only services in 'test' and 'monitoring' profiles are started + app_url = f"http://{test_compose.get_service_host('app', 8080)}:{test_compose.get_service_port('app', 8080)}" + print(f"Test app available at: {app_url}") +``` + +### Integration Testing Setup + +```python +from testcontainers.compose import DockerCompose +import pytest +import requests + +@pytest.fixture(scope="session") +def app_stack(): + """Pytest fixture for full application stack.""" + with DockerCompose(".", compose_file_name="test-stack.yml") as compose: + # Wait for services to be ready + compose.wait_for(f"http://{compose.get_service_host('app', 8080)}:{compose.get_service_port('app', 8080)}/health") + + yield compose + +def test_api_endpoints(app_stack): + """Test API endpoints with full stack.""" + compose = app_stack + + # Get API endpoint + api_host = compose.get_service_host("api", 3000) + api_port = compose.get_service_port("api", 3000) + base_url = f"http://{api_host}:{api_port}" + + # Test endpoints + response = requests.get(f"{base_url}/users") + assert response.status_code == 200 + + response = requests.post(f"{base_url}/users", json={"name": "Test User"}) + assert response.status_code == 201 + +def test_database_integration(app_stack): + """Test database operations.""" + compose = app_stack + + # Execute database command + result = compose.exec_in_container("psql -U postgres -c 'SELECT version();'", "db") + assert "PostgreSQL" in result +``` + +### Complex Multi-Service Architecture + +```python +from testcontainers.compose import DockerCompose +import time + +# docker-compose.yml with web, api, worker, db, redis, elasticsearch +with DockerCompose(".", compose_file_name="full-stack.yml") as compose: + # Get all service endpoints + services = { + "web": compose.get_service_host_and_port("web", 80), + "api": compose.get_service_host_and_port("api", 3000), + "db": compose.get_service_host_and_port("db", 5432), + "redis": compose.get_service_host_and_port("redis", 6379), + "elasticsearch": compose.get_service_host_and_port("elasticsearch", 9200) + } + + print("Service endpoints:") + for service, (host, port) in services.items(): + print(f" {service}: {host}:{port}") + + # Wait for all services to be healthy + for container in compose.get_containers(): + while container.Health not in ["healthy", ""]: + print(f"Waiting for {container.Service} to be healthy...") + time.sleep(2) + # Refresh container info + container = compose.get_container(container.Service) + + print("All services are ready!") + + # Run integration tests + web_host, web_port = services["web"] + response = requests.get(f"http://{web_host}:{web_port}") + print(f"Web response status: {response.status_code}") +``` + +### Environment Configuration + +```python +from testcontainers.compose import DockerCompose +import os + +# Set environment variables for compose +os.environ["DATABASE_URL"] = "postgres://test:test@db:5432/testdb" +os.environ["REDIS_URL"] = "redis://redis:6379" +os.environ["DEBUG"] = "true" + +# Compose with environment file and variable interpolation +compose = DockerCompose( + context="./infrastructure", + env_file="test.env", + keep_volumes=False # Clean up volumes after testing +) + +with compose: + # Environment variables are available in compose services + config = compose.get_config() + print("Compose configuration:", config) + + # Services use interpolated environment variables + app_logs = compose.get_logs("app") + print("Application logs:", app_logs) +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-testcontainers/docs/core-containers.md b/.tessl/tiles/tessl/pypi-testcontainers/docs/core-containers.md new file mode 100644 index 0000000..1849956 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-testcontainers/docs/core-containers.md @@ -0,0 +1,393 @@ +# Core Container Management + +The fundamental Docker container lifecycle management capabilities that form the foundation of all testcontainers functionality. The DockerContainer class provides comprehensive configuration options, networking, volume mounting, and environment setup through a fluent API. + +## Capabilities + +### Basic Container Operations + +Core container lifecycle management including creation, startup, shutdown, and resource cleanup with automatic or manual control. + +```python { .api } +class DockerContainer: + def __init__( + self, + image: str, + docker_client_kw: Optional[dict[str, Any]] = None, + command: Optional[str] = None, + env: Optional[dict[str, str]] = None, + name: Optional[str] = None, + ports: Optional[list[int]] = None, + volumes: Optional[list[tuple[str, str, str]]] = None, + network: Optional[Network] = None, + network_aliases: Optional[list[str]] = None, + **kwargs: Any + ): + """ + Initialize a Docker container. + + Args: + image: Docker image name and tag + docker_client_kw: Docker client configuration + command: Container command to execute + env: Environment variables dictionary + name: Container name + ports: Ports to expose + volumes: Volume mounts as (host_path, container_path, mode) tuples + network: Network to connect to + network_aliases: Network aliases for the container + **kwargs: Additional Docker container options + """ + + def start(self) -> "DockerContainer": + """ + Start the container. + + Returns: + Self for method chaining + """ + + def stop(self, force: bool = True, delete_volume: bool = True) -> None: + """ + Stop and remove the container. + + Args: + force: Force container removal + delete_volume: Delete associated volumes + """ + + def __enter__(self) -> "DockerContainer": + """Context manager entry - starts container.""" + + def __exit__(self, exc_type, exc_val, exc_tb) -> None: + """Context manager exit - stops container.""" +``` + +### Container Configuration + +Fluent API for configuring container properties including environment variables, ports, volumes, networking, and Docker-specific options. + +```python { .api } +def with_env(self, key: str, value: str) -> "DockerContainer": + """ + Add environment variable. + + Args: + key: Environment variable name + value: Environment variable value + + Returns: + Self for method chaining + """ + +def with_envs(self, **variables: str) -> "DockerContainer": + """ + Add multiple environment variables. + + Args: + **variables: Environment variables as keyword arguments + + Returns: + Self for method chaining + """ + +def with_env_file(self, env_file: Union[str, PathLike]) -> "DockerContainer": + """ + Load environment variables from file. + + Args: + env_file: Path to environment file + + Returns: + Self for method chaining + """ + +def with_command(self, command: Union[str, list[str]]) -> "DockerContainer": + """ + Set container command. + + Args: + command: Command to execute in container + + Returns: + Self for method chaining + """ + +def with_name(self, name: str) -> "DockerContainer": + """ + Set container name. + + Args: + name: Container name + + Returns: + Self for method chaining + """ + +def with_kwargs(self, **kwargs: Any) -> "DockerContainer": + """ + Add Docker container creation arguments. + + Args: + **kwargs: Docker client container creation arguments + + Returns: + Self for method chaining + """ + +def with_user(self, user: str) -> "DockerContainer": + """ + Set container user. + + Args: + user: User to run container as (user or user:group) + + Returns: + Self for method chaining + """ + +def with_working_directory(self, working_directory: str) -> "DockerContainer": + """ + Set container working directory. + + Args: + working_directory: Working directory path + + Returns: + Self for method chaining + """ +``` + +### Port Management + +Configure port exposure and binding for container network access, supporting both automatic port assignment and explicit host port binding. + +```python { .api } +def with_exposed_ports(self, *ports: int) -> "DockerContainer": + """ + Expose ports without binding to host ports. + + Args: + *ports: Port numbers to expose + + Returns: + Self for method chaining + """ + +def with_bind_ports(self, container: int, host: Optional[int] = None) -> "DockerContainer": + """ + Bind container port to specific host port. + + Args: + container: Container port number + host: Host port number (random if None) + + Returns: + Self for method chaining + """ + +def get_exposed_port(self, port: int) -> str: + """ + Get the host port mapped to container port. + + Args: + port: Container port number + + Returns: + Host port number as string + + Raises: + NoSuchPortExposed: If port not exposed + """ + +def get_container_host_ip(self) -> str: + """ + Get the IP address to connect to the container. + + Returns: + IP address string + """ +``` + +### Volume Management + +Mount host directories and files into containers with configurable access modes and path mapping. + +```python { .api } +def with_volume_mapping(self, host: str, container: str, mode: str = "ro") -> "DockerContainer": + """ + Mount host path into container. + + Args: + host: Host filesystem path + container: Container filesystem path + mode: Mount mode ('ro', 'rw', 'z', 'Z') + + Returns: + Self for method chaining + """ +``` + +### Network Configuration + +Connect containers to Docker networks with custom aliases and network-specific configuration. + +```python { .api } +def with_network(self, network: Network) -> "DockerContainer": + """ + Connect container to network. + + Args: + network: Network instance to connect to + + Returns: + Self for method chaining + """ + +def with_network_aliases(self, *aliases: str) -> "DockerContainer": + """ + Set network aliases for container. + + Args: + *aliases: Network alias names + + Returns: + Self for method chaining + """ +``` + +### Container Inspection and Control + +Access container runtime information, execute commands, and retrieve logs for debugging and integration. + +```python { .api } +def get_wrapped_container(self) -> "Container": + """ + Get the underlying Docker container object. + + Returns: + Docker container instance + """ + +def get_docker_client(self) -> DockerClient: + """ + Get the Docker client instance. + + Returns: + Docker client + """ + +def get_logs(self) -> tuple[bytes, bytes]: + """ + Get container logs. + + Returns: + Tuple of (stdout, stderr) as bytes + """ + +def exec(self, command: Union[str, list[str]]) -> ExecResult: + """ + Execute command in running container. + + Args: + command: Command to execute + + Returns: + Execution result with exit_code and output + """ +``` + +### Platform Compatibility + +Handle cross-platform differences and architecture emulation for consistent behavior across development environments. + +```python { .api } +def maybe_emulate_amd64(self) -> "DockerContainer": + """ + Enable AMD64 emulation on ARM platforms. + + Returns: + Self for method chaining + """ +``` + +## Usage Examples + +### Basic Container Usage + +```python +from testcontainers.core.container import DockerContainer + +# Simple container with automatic cleanup +with DockerContainer("nginx:alpine") as container: + container.with_exposed_ports(80) + host_port = container.get_exposed_port(80) + host_ip = container.get_container_host_ip() + print(f"Nginx available at http://{host_ip}:{host_port}") +``` + +### Advanced Configuration + +```python +from testcontainers.core.container import DockerContainer +from testcontainers.core.network import Network + +# Create custom network +with Network() as network: + # Configure container with multiple options + container = DockerContainer("postgres:13") \ + .with_env("POSTGRES_DB", "testdb") \ + .with_env("POSTGRES_USER", "testuser") \ + .with_env("POSTGRES_PASSWORD", "testpass") \ + .with_exposed_ports(5432) \ + .with_volume_mapping("./data", "/var/lib/postgresql/data", "rw") \ + .with_network(network) \ + .with_network_aliases("database", "db") \ + .with_name("test-postgres") + + with container: + # Container is now running with full configuration + connection_host = container.get_container_host_ip() + connection_port = container.get_exposed_port(5432) +``` + +### Command Execution + +```python +with DockerContainer("ubuntu:20.04") as container: + container.with_command("sleep 30") + + # Execute commands in running container + result = container.exec("ls -la /") + print(f"Exit code: {result.exit_code}") + print(f"Output: {result.output.decode()}") + + # Get container logs + stdout, stderr = container.get_logs() + print(f"Container output: {stdout.decode()}") +``` + +## Types + +```python { .api } +class Mount(TypedDict): + bind: str # Container path + mode: str # Mount mode ('ro', 'rw', etc.) + +class Network: + def __init__( + self, + docker_client_kw: Optional[dict] = None, + docker_network_kw: Optional[dict] = None + ): ... + + def create(self) -> "Network": ... + def remove(self) -> None: ... + def connect(self, container_id: str, network_aliases: Optional[list[str]] = None) -> None: ... + + @property + def name(self) -> str: ... + + @property + def id(self) -> str: ... +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-testcontainers/docs/database-containers.md b/.tessl/tiles/tessl/pypi-testcontainers/docs/database-containers.md new file mode 100644 index 0000000..9da716d --- /dev/null +++ b/.tessl/tiles/tessl/pypi-testcontainers/docs/database-containers.md @@ -0,0 +1,514 @@ +# Database Containers + +Pre-configured containers for popular databases including relational databases (PostgreSQL, MySQL), NoSQL databases (MongoDB, Cassandra), and specialized data stores. Each container provides service-specific configuration options, connection utilities, and client integration. + +## Capabilities + +### PostgreSQL Container + +PostgreSQL relational database container with automatic database, user, and password configuration, supporting multiple PostgreSQL versions and client drivers. + +```python { .api } +class PostgresContainer: + def __init__( + self, + image: str = "postgres:latest", + port: int = 5432, + username: Optional[str] = None, + password: Optional[str] = None, + dbname: Optional[str] = None, + driver: str = "psycopg2", + **kwargs: Any + ): + """ + Initialize PostgreSQL container. + + Args: + image: PostgreSQL Docker image + port: PostgreSQL port (default 5432) + username: Database username (auto-generated if None) + password: Database password (auto-generated if None) + dbname: Database name (auto-generated if None) + driver: Database driver for connection URL + **kwargs: Additional container options + """ + + def get_connection_url(self, host: Optional[str] = None, driver: Optional[str] = None) -> str: + """ + Get PostgreSQL connection URL. + + Args: + host: Override host (uses container host if None) + driver: Override driver (uses instance driver if None) + + Returns: + PostgreSQL connection URL string + """ +``` + +### MySQL Container + +MySQL relational database container with configurable authentication, database initialization, and support for different MySQL variants. + +```python { .api } +class MySqlContainer: + def __init__( + self, + image: str = "mysql:latest", + dialect: Optional[str] = None, + username: Optional[str] = None, + root_password: Optional[str] = None, + password: Optional[str] = None, + dbname: Optional[str] = None, + port: int = 3306, + seed: Optional[str] = None, + **kwargs: Any + ): + """ + Initialize MySQL container. + + Args: + image: MySQL Docker image + dialect: SQL dialect (mysql, mysql+pymysql) + username: Database username + root_password: Root user password + password: User password + dbname: Database name + port: MySQL port (default 3306) + seed: SQL file path for database initialization + **kwargs: Additional container options + """ + + def get_connection_url(self) -> str: + """ + Get MySQL connection URL. + + Returns: + MySQL connection URL string + """ +``` + +### MongoDB Container + +MongoDB NoSQL document database container with authentication support and direct client integration. + +```python { .api } +class MongoDbContainer: + def __init__( + self, + image: str = "mongo:latest", + port: int = 27017, + username: Optional[str] = None, + password: Optional[str] = None, + dbname: Optional[str] = None, + **kwargs: Any + ): + """ + Initialize MongoDB container. + + Args: + image: MongoDB Docker image + port: MongoDB port (default 27017) + username: Database username + password: Database password + dbname: Database name + **kwargs: Additional container options + """ + + def get_connection_url(self) -> str: + """ + Get MongoDB connection URL. + + Returns: + MongoDB connection URL string + """ + + def get_connection_client(self): + """ + Get configured PyMongo client. + + Returns: + PyMongo MongoClient instance + """ +``` + +### ClickHouse Container + +ClickHouse analytical database container optimized for OLAP workloads with configurable users and databases. + +```python { .api } +class ClickHouseContainer: + def __init__( + self, + image: str = "clickhouse/clickhouse-server:latest", + port: int = 8123, + username: str = "default", + password: str = "", + dbname: str = "default", + **kwargs: Any + ): + """ + Initialize ClickHouse container. + + Args: + image: ClickHouse Docker image + port: HTTP port (default 8123) + username: Database username + password: Database password + dbname: Database name + **kwargs: Additional container options + """ + + def get_connection_url(self) -> str: + """ + Get ClickHouse connection URL. + + Returns: + ClickHouse connection URL string + """ +``` + +### Neo4j Container + +Neo4j graph database container with authentication configuration and Bolt protocol support. + +```python { .api } +class Neo4jContainer: + def __init__( + self, + image: str = "neo4j:latest", + username: str = "neo4j", + password: str = "test", + **kwargs: Any + ): + """ + Initialize Neo4j container. + + Args: + image: Neo4j Docker image + username: Database username + password: Database password + **kwargs: Additional container options + """ + + def get_connection_url(self) -> str: + """ + Get Neo4j Bolt connection URL. + + Returns: + Neo4j Bolt connection URL string + """ +``` + +### Cassandra Container + +Apache Cassandra NoSQL wide-column database container with cluster configuration support. + +```python { .api } +class CassandraContainer: + def __init__( + self, + image: str = "cassandra:latest", + **kwargs: Any + ): + """ + Initialize Cassandra container. + + Args: + image: Cassandra Docker image + **kwargs: Additional container options + """ + + def get_connection_url(self) -> str: + """ + Get Cassandra connection URL. + + Returns: + Cassandra connection URL string + """ +``` + +### InfluxDB Container + +InfluxDB time-series database container with configurable authentication and database initialization. + +```python { .api } +class InfluxDbContainer: + def __init__( + self, + image: str = "influxdb:latest", + port: int = 8086, + username: Optional[str] = None, + password: Optional[str] = None, + dbname: Optional[str] = None, + **kwargs: Any + ): + """ + Initialize InfluxDB container. + + Args: + image: InfluxDB Docker image + port: InfluxDB port (default 8086) + username: Database username + password: Database password + dbname: Database name + **kwargs: Additional container options + """ + + def get_connection_url(self) -> str: + """ + Get InfluxDB connection URL. + + Returns: + InfluxDB connection URL string + """ +``` + +### Microsoft SQL Server Container + +Microsoft SQL Server relational database container with SA authentication and database configuration. + +```python { .api } +class SqlServerContainer: + def __init__( + self, + image: str = "mcr.microsoft.com/mssql/server:latest", + password: Optional[str] = None, + **kwargs: Any + ): + """ + Initialize SQL Server container. + + Args: + image: SQL Server Docker image + password: SA user password + **kwargs: Additional container options + """ + + def get_connection_url(self) -> str: + """ + Get SQL Server connection URL. + + Returns: + SQL Server connection URL string + """ +``` + +### Oracle Free Container + +Oracle Database Free (formerly Oracle XE) container for Oracle database testing. + +```python { .api } +class OracleDbContainer: + def __init__( + self, + image: str = "gvenzl/oracle-free:latest", + username: Optional[str] = None, + password: Optional[str] = None, + **kwargs: Any + ): + """ + Initialize Oracle Database container. + + Args: + image: Oracle Database Docker image + username: Database username + password: Database password + **kwargs: Additional container options + """ + + def get_connection_url(self) -> str: + """ + Get Oracle Database connection URL. + + Returns: + Oracle Database connection URL string + """ +``` + +### ArangoDB Container + +ArangoDB multi-model NoSQL database container supporting documents, graphs, and search. + +```python { .api } +class ArangoDbContainer: + def __init__( + self, + image: str = "arangodb:latest", + username: str = "root", + password: Optional[str] = None, + **kwargs: Any + ): + """ + Initialize ArangoDB container. + + Args: + image: ArangoDB Docker image + username: Database username + password: Database password + **kwargs: Additional container options + """ + + def get_connection_url(self) -> str: + """ + Get ArangoDB connection URL. + + Returns: + ArangoDB connection URL string + """ +``` + +### DB2 Container + +IBM DB2 database container for enterprise database testing. + +```python { .api } +class Db2Container: + def __init__( + self, + image: str = "ibmcom/db2:latest", + username: Optional[str] = None, + password: Optional[str] = None, + dbname: Optional[str] = None, + **kwargs: Any + ): + """ + Initialize DB2 container. + + Args: + image: DB2 Docker image + username: Database username + password: Database password + dbname: Database name + **kwargs: Additional container options + """ + + def get_connection_url(self) -> str: + """ + Get DB2 connection URL. + + Returns: + DB2 connection URL string + """ +``` + +### Scylla Container + +Scylla high-performance Cassandra-compatible NoSQL database container. + +```python { .api } +class ScyllaContainer: + def __init__( + self, + image: str = "scylladb/scylla:latest", + **kwargs: Any + ): + """ + Initialize Scylla container. + + Args: + image: Scylla Docker image + **kwargs: Additional container options + """ + + def get_connection_url(self) -> str: + """ + Get Scylla connection URL. + + Returns: + Scylla connection URL string + """ +``` + +## Usage Examples + +### PostgreSQL Integration + +```python +from testcontainers.postgres import PostgresContainer +import psycopg2 + +with PostgresContainer("postgres:13") as postgres: + # Get connection details + connection_url = postgres.get_connection_url() + + # Connect using psycopg2 + conn = psycopg2.connect(connection_url) + cursor = conn.cursor() + + # Execute queries + cursor.execute("CREATE TABLE users (id SERIAL PRIMARY KEY, email VARCHAR(255))") + cursor.execute("INSERT INTO users (email) VALUES (%s)", ("test@example.com",)) + conn.commit() + + cursor.execute("SELECT * FROM users") + users = cursor.fetchall() + print(users) + + conn.close() +``` + +### MongoDB Integration + +```python +from testcontainers.mongodb import MongoDbContainer + +with MongoDbContainer("mongo:4.4") as mongo: + # Get MongoDB client + client = mongo.get_connection_client() + + # Use the database + db = client.test_database + collection = db.test_collection + + # Insert document + result = collection.insert_one({"name": "John", "age": 30}) + print(f"Inserted document ID: {result.inserted_id}") + + # Query documents + users = list(collection.find({"age": {"$gte": 18}})) + print(f"Found {len(users)} adult users") +``` + +### Multi-Database Setup + +```python +from testcontainers.postgres import PostgresContainer +from testcontainers.redis import RedisContainer +from testcontainers.core.network import Network + +# Create shared network for containers +with Network() as network: + # Start multiple databases + with PostgresContainer("postgres:13") as postgres, \ + RedisContainer("redis:6") as redis: + + postgres.with_network(network).with_network_aliases("postgres") + redis.with_network(network).with_network_aliases("redis") + + # Use both databases in your application + pg_url = postgres.get_connection_url() + redis_client = redis.get_client() + + # Application logic using both databases + print(f"PostgreSQL: {pg_url}") + print(f"Redis client: {redis_client}") +``` + +### Custom Database Configuration + +```python +from testcontainers.mysql import MySqlContainer + +# Custom MySQL configuration +mysql = MySqlContainer("mysql:8.0") \ + .with_env("MYSQL_ROOT_PASSWORD", "rootpass") \ + .with_env("MYSQL_DATABASE", "app_db") \ + .with_env("MYSQL_USER", "app_user") \ + .with_env("MYSQL_PASSWORD", "app_pass") \ + .with_volume_mapping("./init.sql", "/docker-entrypoint-initdb.d/init.sql", "ro") + +with mysql: + connection_url = mysql.get_connection_url() + print(f"MySQL connection: {connection_url}") +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-testcontainers/docs/index.md b/.tessl/tiles/tessl/pypi-testcontainers/docs/index.md new file mode 100644 index 0000000..28166af --- /dev/null +++ b/.tessl/tiles/tessl/pypi-testcontainers/docs/index.md @@ -0,0 +1,395 @@ +# Testcontainers + +A comprehensive Python library for managing throwaway Docker instances in tests. Testcontainers enables developers to create isolated, reproducible test environments by programmatically spinning up Docker containers for databases, message queues, web services, and other infrastructure components during test execution. + +## Package Information + +- **Package Name**: testcontainers +- **Language**: Python +- **Installation**: `pip install testcontainers` +- **Python Support**: 3.9+ + +## Core Imports + +```python +from testcontainers.core.container import DockerContainer +from testcontainers.core.network import Network +from testcontainers.core.waiting_utils import wait_for_logs, wait_for +``` + +For specialized containers: + +```python +from testcontainers.postgres import PostgresContainer +from testcontainers.redis import RedisContainer +from testcontainers.mysql import MySqlContainer +from testcontainers.mongodb import MongoDbContainer +from testcontainers.kafka import KafkaContainer +from testcontainers.elasticsearch import ElasticSearchContainer +``` + +For compose orchestration: + +```python +from testcontainers.compose import DockerCompose +``` + +For configuration and exceptions: + +```python +from testcontainers.core.config import testcontainers_config +from testcontainers.core.exceptions import ( + ContainerStartException, + ContainerConnectException, + NoSuchPortExposed +) +``` + +## Basic Usage + +```python +from testcontainers.core.container import DockerContainer +from testcontainers.core.waiting_utils import wait_for_logs + +# Basic container usage with context manager +with DockerContainer("hello-world") as container: + delay = wait_for_logs(container, "Hello from Docker!") + +# Database container example +from testcontainers.postgres import PostgresContainer + +with PostgresContainer("postgres:13") as postgres: + # Container automatically configured with database, user, password + connection_url = postgres.get_connection_url() + + # Use with your database client + import psycopg2 + conn = psycopg2.connect(connection_url) + cursor = conn.cursor() + cursor.execute("SELECT version();") + result = cursor.fetchone() + print(result) + +# Compose orchestration example +from testcontainers.compose import DockerCompose + +with DockerCompose(".", compose_file_name="docker-compose.test.yml") as compose: + # Get specific service container + web_container = compose.get_container("web") + db_container = compose.get_container("db") + + # Get service endpoints + web_url = compose.get_service_host("web", 80) + db_host = compose.get_service_host("db", 5432) +``` + +## Architecture + +Testcontainers follows a layered architecture enabling flexible container management: + +- **DockerContainer**: Core container abstraction with fluent configuration API +- **Specialized Containers**: Pre-configured containers for specific services (databases, caches, etc.) +- **Compose Integration**: Full Docker Compose orchestration support +- **Waiting Strategies**: Robust container readiness detection +- **Resource Management**: Automatic cleanup via Ryuk or manual lifecycle control + +This design provides both simplicity for common use cases and extensibility for complex testing scenarios, integrating seamlessly with pytest, unittest, and other Python testing frameworks. + +## Capabilities + +### Core Container Management + +Fundamental Docker container lifecycle management with comprehensive configuration options, networking, volume mounting, and environment setup. Provides the foundation for all specialized containers. + +```python { .api } +class DockerContainer: + def __init__( + self, + image: str, + docker_client_kw: Optional[dict] = None, + command: Optional[str] = None, + env: Optional[dict] = None, + name: Optional[str] = None, + ports: Optional[list] = None, + volumes: Optional[list] = None, + network: Optional[Network] = None, + network_aliases: Optional[list] = None, + **kwargs + ): ... + + def start(self) -> "DockerContainer": ... + def stop(self, force: bool = True, delete_volume: bool = True) -> None: ... + def get_exposed_port(self, port: int) -> str: ... + def get_container_host_ip(self) -> str: ... + def with_env(self, key: str, value: str) -> "DockerContainer": ... + def with_exposed_ports(self, *ports: int) -> "DockerContainer": ... + def with_bind_ports(self, container: int, host: Optional[int] = None) -> "DockerContainer": ... + def with_volume_mapping(self, host: str, container: str, mode: str = "ro") -> "DockerContainer": ... +``` + +[Core Container Management](./core-containers.md) + +### Database Containers + +Pre-configured containers for popular databases including PostgreSQL, MySQL, MongoDB, Redis, and many others. Each provides service-specific configuration options and connection utilities. + +```python { .api } +class PostgresContainer: + def __init__( + self, + image: str = "postgres:latest", + port: int = 5432, + username: Optional[str] = None, + password: Optional[str] = None, + dbname: Optional[str] = None, + **kwargs + ): ... + def get_connection_url(self, host: Optional[str] = None) -> str: ... + +class MySqlContainer: + def __init__( + self, + image: str = "mysql:latest", + username: Optional[str] = None, + password: Optional[str] = None, + dbname: Optional[str] = None, + **kwargs + ): ... + def get_connection_url(self) -> str: ... + +class MongoDbContainer: + def __init__( + self, + image: str = "mongo:latest", + port: int = 27017, + username: Optional[str] = None, + password: Optional[str] = None, + **kwargs + ): ... + def get_connection_url(self) -> str: ... + def get_connection_client(self): ... +``` + +[Database Containers](./database-containers.md) + +### Cache and Messaging Containers + +Containers for caching systems, message queues, and pub/sub services including Redis, Kafka, RabbitMQ, NATS, and messaging brokers with client integration. + +```python { .api } +class RedisContainer: + def __init__( + self, + image: str = "redis:latest", + port: int = 6379, + password: Optional[str] = None, + **kwargs + ): ... + def get_client(self, **kwargs): ... + +class KafkaContainer: + def __init__( + self, + image: str = "confluentinc/cp-kafka:7.6.0", + port: int = 9093, + **kwargs + ): ... + def get_bootstrap_server(self) -> str: ... + def with_kraft(self) -> "KafkaContainer": ... +``` + +[Cache and Messaging](./cache-messaging.md) + +### Docker Compose Orchestration + +Complete Docker Compose integration for managing multi-container environments, service discovery, and complex application stacks during testing. + +```python { .api } +class DockerCompose: + def __init__( + self, + context: str, + compose_file_name: Optional[str] = None, + pull: bool = False, + build: bool = False, + wait: bool = True, + **kwargs + ): ... + + def start(self) -> "DockerCompose": ... + def stop(self, down: bool = True) -> None: ... + def get_container(self, service_name: str) -> ComposeContainer: ... + def get_service_host(self, service_name: str, port: int) -> str: ... + def get_service_port(self, service_name: str, port: int) -> int: ... + def exec_in_container(self, command: str, service_name: str): ... +``` + +[Docker Compose](./compose.md) + +### Waiting Strategies and Utilities + +Robust container readiness detection, log monitoring, and condition waiting utilities for reliable test execution across different container types and startup behaviors. + +```python { .api } +def wait_container_is_ready(*transient_exceptions) -> Callable: ... + +def wait_for_logs( + container: DockerContainer, + predicate: Union[str, Callable], + timeout: float = 120, + interval: float = 1, + **kwargs +) -> float: ... + +def wait_for(condition: Callable, timeout: float = 120, interval: float = 1) -> bool: ... +``` + +[Waiting Strategies](./waiting-strategies.md) + +### Search and Analytics Containers + +Specialized containers for search engines, analytics platforms, and data processing including Elasticsearch, OpenSearch, ClickHouse, and vector databases. + +```python { .api } +class ElasticSearchContainer: + def __init__(self, image: str = "elasticsearch", port: int = 9200, **kwargs): ... + def get_url(self) -> str: ... + +class ClickHouseContainer: + def __init__(self, image: str = "clickhouse/clickhouse-server", **kwargs): ... + def get_connection_url(self) -> str: ... +``` + +[Search and Analytics](./search-analytics.md) + +### Cloud Services Integration + +Containers for cloud service emulation and integration including LocalStack for AWS services, Azure emulators, and Google Cloud Platform services for local development and testing. + +```python { .api } +class LocalStackContainer: + def __init__( + self, + image: str = "localstack/localstack:2.0.1", + edge_port: int = 4566, + **kwargs + ): ... + def with_services(self, *services: str) -> "LocalStackContainer": ... + def get_url(self) -> str: ... + def get_client(self, name: str, **kwargs): ... +``` + +[Cloud Services](./cloud-services.md) + +### Web and Testing Containers + +Containers for web services, browser automation, and testing infrastructure including Nginx, Selenium WebDriver, and specialized testing utilities. + +```python { .api } +class BrowserWebDriverContainer: + def __init__( + self, + capabilities: dict, + image: Optional[str] = None, + port: int = 4444, + **kwargs + ): ... + def get_driver(self): ... + def get_connection_url(self) -> str: ... + def with_options(self, options) -> "BrowserWebDriverContainer": ... + +class NginxContainer: + def __init__(self, image: str = "nginx:alpine", port: int = 80, **kwargs): ... + def get_url(self) -> str: ... +``` + +[Web and Testing](./web-testing.md) + +### Additional Service Containers + +Additional specialized containers for various development and testing needs. + +```python { .api } +class VaultContainer: + def __init__(self, image: str = "vault:latest", port: int = 8200, **kwargs): ... + def get_url(self) -> str: ... + +class MailpitContainer: + def __init__(self, image: str = "axllent/mailpit:latest", **kwargs): ... + def get_smtp_host(self) -> str: ... + def get_web_url(self) -> str: ... + +class OllamaContainer: + def __init__(self, image: str = "ollama/ollama:latest", **kwargs): ... + def get_endpoint_url(self) -> str: ... + +class SftpContainer: + def __init__(self, image: str = "atmoz/sftp:latest", **kwargs): ... + def get_connection_url(self) -> str: ... +``` + +## Configuration and Error Handling + +### Global Configuration + +```python { .api } +from testcontainers.core.config import testcontainers_config + +# Configuration properties +testcontainers_config.max_tries: int +testcontainers_config.sleep_time: int +testcontainers_config.timeout: int +testcontainers_config.ryuk_disabled: bool +``` + +### Exception Types + +```python { .api } +class ContainerStartException(RuntimeError): ... +class ContainerConnectException(RuntimeError): ... +class ContainerIsNotRunning(RuntimeError): ... +class NoSuchPortExposed(RuntimeError): ... +``` + +## Available Container Modules + +The library includes 45+ specialized container modules providing pre-configured containers for popular services: + +**Databases**: postgres, mysql, mongodb, redis, cassandra, clickhouse, cockroachdb, cosmosdb, db2, influxdb, mssql, neo4j, oracle-free, scylla, trino + +**Vector Databases**: chroma, milvus, qdrant, weaviate + +**Message Brokers**: kafka, mqtt, nats, rabbitmq + +**Search & Analytics**: elasticsearch, opensearch + +**Storage & Cache**: azurite, memcached, minio, registry, vault + +**Development Tools**: generic, keycloak, k3s, mailpit, nginx, ollama, selenium, sftp + +**Cloud Services**: aws, google, localstack, openfga + +Each specialized container follows similar patterns with service-specific configuration methods and client getters appropriate for the service type. + +## Exception Types + +```python { .api } +from testcontainers.core.exceptions import ( + ContainerStartException, + ContainerConnectException, + ContainerIsNotRunning, + NoSuchPortExposed +) + +class ContainerStartException(RuntimeError): + """Raised when container fails to start properly.""" + +class ContainerConnectException(RuntimeError): + """Raised when connection to container fails.""" + +class ContainerIsNotRunning(RuntimeError): + """Raised when operation requires running container but container is not running.""" + +class NoSuchPortExposed(RuntimeError): + """Raised when trying to access a port that was not exposed.""" +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-testcontainers/docs/search-analytics.md b/.tessl/tiles/tessl/pypi-testcontainers/docs/search-analytics.md new file mode 100644 index 0000000..2f630f4 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-testcontainers/docs/search-analytics.md @@ -0,0 +1,534 @@ +# Search and Analytics Containers + +Specialized containers for search engines, analytics platforms, and data processing including Elasticsearch, OpenSearch, ClickHouse, and vector databases for full-text search, analytics, and AI/ML workloads. + +## Capabilities + +### Elasticsearch Container + +Elasticsearch distributed search and analytics engine container with configurable cluster settings and security options. + +```python { .api } +class ElasticSearchContainer: + def __init__( + self, + image: str = "elasticsearch:8.8.0", + port: int = 9200, + **kwargs: Any + ): + """ + Initialize Elasticsearch container. + + Args: + image: Elasticsearch Docker image + port: HTTP port (default 9200) + **kwargs: Additional container options + """ + + def get_url(self) -> str: + """ + Get Elasticsearch HTTP URL. + + Returns: + Elasticsearch HTTP URL string + """ +``` + +### OpenSearch Container + +OpenSearch distributed search and analytics engine container with dashboard support and security configuration. + +```python { .api } +class OpenSearchContainer: + def __init__( + self, + image: str = "opensearchproject/opensearch:latest", + port: int = 9200, + **kwargs: Any + ): + """ + Initialize OpenSearch container. + + Args: + image: OpenSearch Docker image + port: HTTP port (default 9200) + **kwargs: Additional container options + """ + + def get_url(self) -> str: + """ + Get OpenSearch HTTP URL. + + Returns: + OpenSearch HTTP URL string + """ +``` + +### Vector Database Containers + +Modern vector databases for similarity search, embeddings, and AI/ML applications. + +```python { .api } +class ChromaContainer: + def __init__( + self, + image: str = "chromadb/chroma:latest", + port: int = 8000, + **kwargs: Any + ): + """ + Initialize Chroma vector database container. + + Args: + image: Chroma Docker image + port: HTTP port (default 8000) + **kwargs: Additional container options + """ + + def get_url(self) -> str: + """ + Get Chroma HTTP URL. + + Returns: + Chroma HTTP URL string + """ + +class WeaviateContainer: + def __init__( + self, + image: str = "semitechnologies/weaviate:latest", + port: int = 8080, + **kwargs: Any + ): + """ + Initialize Weaviate vector database container. + + Args: + image: Weaviate Docker image + port: HTTP port (default 8080) + **kwargs: Additional container options + """ + + def get_url(self) -> str: + """ + Get Weaviate HTTP URL. + + Returns: + Weaviate HTTP URL string + """ + +class QdrantContainer: + def __init__( + self, + image: str = "qdrant/qdrant:latest", + port: int = 6333, + **kwargs: Any + ): + """ + Initialize Qdrant vector database container. + + Args: + image: Qdrant Docker image + port: HTTP port (default 6333) + **kwargs: Additional container options + """ + + def get_url(self) -> str: + """ + Get Qdrant HTTP URL. + + Returns: + Qdrant HTTP URL string + """ + +class MilvusContainer: + def __init__( + self, + image: str = "milvusdb/milvus:latest", + port: int = 19530, + **kwargs: Any + ): + """ + Initialize Milvus vector database container. + + Args: + image: Milvus Docker image + port: gRPC port (default 19530) + **kwargs: Additional container options + """ + + def get_connection_args(self) -> dict: + """ + Get Milvus connection arguments. + + Returns: + Dictionary with host and port for Milvus client + """ +``` + +### Analytics Database Containers + +High-performance analytics and columnar databases for OLAP workloads. + +```python { .api } +class ClickHouseContainer: + def __init__( + self, + image: str = "clickhouse/clickhouse-server:latest", + port: int = 8123, + username: str = "default", + password: str = "", + dbname: str = "default", + **kwargs: Any + ): + """ + Initialize ClickHouse container. + + Args: + image: ClickHouse Docker image + port: HTTP port (default 8123) + username: Database username + password: Database password + dbname: Database name + **kwargs: Additional container options + """ + + def get_connection_url(self) -> str: + """ + Get ClickHouse connection URL. + + Returns: + ClickHouse connection URL string + """ + +class TrinoContainer: + def __init__( + self, + image: str = "trinodb/trino:latest", + port: int = 8080, + **kwargs: Any + ): + """ + Initialize Trino distributed query engine container. + + Args: + image: Trino Docker image + port: HTTP port (default 8080) + **kwargs: Additional container options + """ + + def get_connection_url(self) -> str: + """ + Get Trino connection URL. + + Returns: + Trino connection URL string + """ +``` + +## Usage Examples + +### Elasticsearch Full-Text Search + +```python +from testcontainers.elasticsearch import ElasticSearchContainer +from elasticsearch import Elasticsearch + +with ElasticSearchContainer("elasticsearch:8.8.0") as es_container: + # Get Elasticsearch client + es_url = es_container.get_url() + es_client = Elasticsearch([es_url]) + + # Wait for cluster to be ready + es_client.cluster.health(wait_for_status="yellow", timeout="30s") + + # Create an index + index_name = "test_index" + es_client.indices.create(index=index_name, ignore=400) + + # Index some documents + documents = [ + {"title": "Elasticsearch Guide", "content": "Learn about search and analytics"}, + {"title": "Python Testing", "content": "Unit testing with containers"}, + {"title": "Data Analytics", "content": "Big data processing and analysis"} + ] + + for i, doc in enumerate(documents, 1): + es_client.index(index=index_name, id=i, body=doc) + + # Refresh index + es_client.indices.refresh(index=index_name) + + # Search documents + search_query = { + "query": { + "match": { + "content": "analytics" + } + } + } + + results = es_client.search(index=index_name, body=search_query) + print(f"Found {results['hits']['total']['value']} matching documents") + + for hit in results['hits']['hits']: + print(f"- {hit['_source']['title']}: {hit['_score']}") +``` + +### Vector Database with Chroma + +```python +from testcontainers.chroma import ChromaContainer +import chromadb +import numpy as np + +with ChromaContainer() as chroma_container: + # Get Chroma client + chroma_url = chroma_container.get_url() + client = chromadb.HttpClient(host=chroma_url.split("://")[1].split(":")[0], + port=int(chroma_url.split(":")[2])) + + # Create collection + collection = client.create_collection("test_collection") + + # Add embeddings + embeddings = [ + [0.1, 0.2, 0.3, 0.4], + [0.5, 0.6, 0.7, 0.8], + [0.9, 0.1, 0.2, 0.3] + ] + + documents = [ + "First document about AI", + "Second document about machine learning", + "Third document about data science" + ] + + ids = ["doc1", "doc2", "doc3"] + + collection.add( + embeddings=embeddings, + documents=documents, + ids=ids + ) + + # Query similar vectors + query_embedding = [0.1, 0.25, 0.35, 0.45] + results = collection.query( + query_embeddings=[query_embedding], + n_results=2 + ) + + print("Similar documents:") + for i, doc in enumerate(results['documents'][0]): + distance = results['distances'][0][i] + print(f"- {doc} (distance: {distance:.4f})") +``` + +### ClickHouse Analytics + +```python +from testcontainers.clickhouse import ClickHouseContainer +import clickhouse_driver + +with ClickHouseContainer() as clickhouse: + # Connect to ClickHouse + connection_url = clickhouse.get_connection_url() + client = clickhouse_driver.Client.from_url(connection_url) + + # Create table for analytics + client.execute(""" + CREATE TABLE IF NOT EXISTS events ( + timestamp DateTime, + user_id UInt32, + event_type String, + value Float64 + ) ENGINE = MergeTree() + ORDER BY timestamp + """) + + # Insert sample data + import datetime + import random + + events_data = [] + base_time = datetime.datetime.now() + + for i in range(1000): + events_data.append(( + base_time + datetime.timedelta(minutes=i), + random.randint(1, 100), + random.choice(['click', 'view', 'purchase']), + random.uniform(1.0, 100.0) + )) + + client.execute( + "INSERT INTO events (timestamp, user_id, event_type, value) VALUES", + events_data + ) + + # Run analytics queries + # Daily event counts + daily_stats = client.execute(""" + SELECT + toDate(timestamp) as date, + event_type, + count() as events, + sum(value) as total_value + FROM events + GROUP BY date, event_type + ORDER BY date, event_type + """) + + print("Daily event statistics:") + for date, event_type, count, total in daily_stats: + print(f"{date} {event_type}: {count} events, total value: {total:.2f}") + + # Top users by activity + top_users = client.execute(""" + SELECT + user_id, + count() as activity_count, + sum(value) as total_value + FROM events + GROUP BY user_id + ORDER BY activity_count DESC + LIMIT 5 + """) + + print("\nTop users by activity:") + for user_id, count, total in top_users: + print(f"User {user_id}: {count} events, total value: {total:.2f}") +``` + +### Multi-Engine Search Setup + +```python +from testcontainers.elasticsearch import ElasticSearchContainer +from testcontainers.opensearch import OpenSearchContainer +from testcontainers.chroma import ChromaContainer +from testcontainers.core.network import Network + +# Create network for search engines +with Network() as network: + # Start multiple search engines + with ElasticSearchContainer() as elasticsearch, \ + OpenSearchContainer() as opensearch, \ + ChromaContainer() as chroma: + + # Connect to network + elasticsearch.with_network(network).with_network_aliases("elasticsearch") + opensearch.with_network(network).with_network_aliases("opensearch") + chroma.with_network(network).with_network_aliases("chroma") + + # Get service URLs + es_url = elasticsearch.get_url() + os_url = opensearch.get_url() + chroma_url = chroma.get_url() + + print(f"Elasticsearch: {es_url}") + print(f"OpenSearch: {os_url}") + print(f"Chroma: {chroma_url}") + + # Use multiple search engines for different use cases + # Elasticsearch for structured search + # OpenSearch for log analytics + # Chroma for vector similarity search +``` + +### Trino Distributed Query Engine + +```python +from testcontainers.trino import TrinoContainer +import trino + +with TrinoContainer() as trino_container: + connection_url = trino_container.get_connection_url() + + # Connect to Trino + conn = trino.dbapi.connect( + host=connection_url.split("://")[1].split(":")[0], + port=int(connection_url.split(":")[2]), + user="test" + ) + + cursor = conn.cursor() + + # Query information schema + cursor.execute("SHOW CATALOGS") + catalogs = cursor.fetchall() + print("Available catalogs:") + for catalog in catalogs: + print(f"- {catalog[0]}") + + # Create memory table for testing + cursor.execute(""" + CREATE TABLE memory.default.sales AS + SELECT * FROM (VALUES + ('2023-01-01', 'Product A', 100.0), + ('2023-01-02', 'Product B', 150.0), + ('2023-01-03', 'Product A', 200.0) + ) AS t(date, product, amount) + """) + + # Query the data + cursor.execute(""" + SELECT product, sum(amount) as total_sales + FROM memory.default.sales + GROUP BY product + ORDER BY total_sales DESC + """) + + results = cursor.fetchall() + print("\nSales by product:") + for product, total in results: + print(f"{product}: ${total}") +``` + +### Vector Similarity Search Comparison + +```python +from testcontainers.chroma import ChromaContainer +from testcontainers.weaviate import WeaviateContainer +from testcontainers.qdrant import QdrantContainer +import numpy as np + +# Generate sample embeddings +def generate_embeddings(n_docs=100, dim=384): + """Generate random embeddings for testing.""" + return np.random.random((n_docs, dim)).tolist() + +embeddings = generate_embeddings() +documents = [f"Document {i}" for i in range(len(embeddings))] + +# Test with multiple vector databases +with ChromaContainer() as chroma, \ + WeaviateContainer() as weaviate, \ + QdrantContainer() as qdrant: + + print("Testing vector similarity search across databases...") + + # Chroma setup + import chromadb + chroma_client = chromadb.HttpClient(host="localhost", port=8000) # Simplified + chroma_collection = chroma_client.create_collection("test") + chroma_collection.add( + embeddings=embeddings, + documents=documents, + ids=[str(i) for i in range(len(documents))] + ) + + # Query all databases with same vector + query_vector = embeddings[0] # Use first document as query + + # Chroma query + chroma_results = chroma_collection.query( + query_embeddings=[query_vector], + n_results=5 + ) + + print(f"Chroma found {len(chroma_results['documents'][0])} similar documents") + + # Compare performance and results + print("Vector database comparison complete") +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-testcontainers/docs/waiting-strategies.md b/.tessl/tiles/tessl/pypi-testcontainers/docs/waiting-strategies.md new file mode 100644 index 0000000..8719694 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-testcontainers/docs/waiting-strategies.md @@ -0,0 +1,380 @@ +# Waiting Strategies and Utilities + +Robust container readiness detection, log monitoring, and condition waiting utilities for reliable test execution across different container types and startup behaviors. Essential for ensuring containers are fully ready before test execution begins. + +## Capabilities + +### Container Readiness Decorator + +Decorator for automatic retry logic when connecting to containers, handling transient errors and ensuring reliable container readiness detection. + +```python { .api } +def wait_container_is_ready(*transient_exceptions: type[BaseException]) -> Callable: + """ + Decorator for container readiness checks with retry logic. + + Automatically retries decorated function until success or timeout. + Handles common transient exceptions plus any additional specified exceptions. + + Args: + *transient_exceptions: Additional exception types to treat as transient + + Returns: + Decorator function that wraps the target method + + Usage: + @wait_container_is_ready(CustomException) + def _connect(self): + # Connection logic that may fail transiently + pass + """ +``` + +### Log-Based Waiting + +Wait for specific log output to appear in container logs, supporting both string patterns and custom predicates. + +```python { .api } +def wait_for_logs( + container: DockerContainer, + predicate: Union[Callable[..., bool], str], + timeout: Union[float, None] = None, + interval: float = 1, + predicate_streams_and: bool = False, + raise_on_exit: bool = False +) -> float: + """ + Wait for specific log output from container. + + Args: + container: Container to monitor + predicate: String to search for or callable returning bool + timeout: Maximum wait time in seconds + interval: Polling interval in seconds + predicate_streams_and: Apply predicate to both stdout and stderr + raise_on_exit: Raise exception if container exits + + Returns: + Time elapsed until condition was met + + Raises: + TimeoutError: If timeout reached without condition being met + ContainerStartException: If container exits unexpectedly + """ +``` + +### Generic Condition Waiting + +Wait for arbitrary conditions to be met with configurable timeout and polling intervals. + +```python { .api } +def wait_for( + condition: Callable[[], bool], + timeout: float = 120, + interval: float = 1 +) -> bool: + """ + Wait for generic condition to be met. + + Args: + condition: Function returning True when condition is met + timeout: Maximum wait time in seconds + interval: Polling interval in seconds + + Returns: + True if condition was met, False if timeout reached + """ +``` + +## Configuration + +### Global Timeout Settings + +Container readiness waiting behavior is controlled by global configuration: + +```python { .api } +from testcontainers.core.config import testcontainers_config + +# Configure waiting behavior +testcontainers_config.max_tries: int = 120 # Maximum retry attempts +testcontainers_config.sleep_time: int = 1 # Sleep between retries (seconds) +testcontainers_config.timeout: int = 120 # Total timeout (seconds) +``` + +### Transient Exceptions + +Default transient exceptions that trigger automatic retries: + +```python { .api } +TRANSIENT_EXCEPTIONS = (TimeoutError, ConnectionError) +``` + +## Usage Examples + +### Basic Log Waiting + +```python +from testcontainers.core.container import DockerContainer +from testcontainers.core.waiting_utils import wait_for_logs + +# Wait for application startup message +with DockerContainer("my-app:latest") as container: + # Wait for specific log message indicating readiness + delay = wait_for_logs(container, "Server started successfully") + print(f"Application ready after {delay:.2f} seconds") + + # Now safe to connect to the application + app_port = container.get_exposed_port(8080) + # Make requests to the application... +``` + +### Pattern Matching in Logs + +```python +from testcontainers.core.container import DockerContainer +from testcontainers.core.waiting_utils import wait_for_logs +import re + +with DockerContainer("postgres:13") as postgres: + postgres.with_env("POSTGRES_PASSWORD", "test") + + # Wait for PostgreSQL to be ready using regex pattern + def postgres_ready(log_line): + return re.search(r"database system is ready to accept connections", log_line) is not None + + delay = wait_for_logs(postgres, postgres_ready, timeout=30) + print(f"PostgreSQL ready after {delay:.2f} seconds") +``` + +### Custom Condition Waiting + +```python +from testcontainers.redis import RedisContainer +from testcontainers.core.waiting_utils import wait_for +import redis +import time + +with RedisContainer() as redis_container: + redis_client = redis_container.get_client() + + # Wait for Redis to accept connections + def redis_ready(): + try: + return redis_client.ping() + except: + return False + + success = wait_for(redis_ready, timeout=30, interval=0.5) + if success: + print("Redis is ready for connections") + else: + print("Redis failed to become ready within timeout") +``` + +### HTTP Endpoint Waiting + +```python +from testcontainers.core.container import DockerContainer +from testcontainers.core.waiting_utils import wait_for +import requests + +with DockerContainer("nginx:alpine") as web_server: + web_server.with_exposed_ports(80) + + host = web_server.get_container_host_ip() + port = web_server.get_exposed_port(80) + + # Wait for HTTP endpoint to respond + def http_ready(): + try: + response = requests.get(f"http://{host}:{port}/", timeout=1) + return response.status_code == 200 + except: + return False + + if wait_for(http_ready, timeout=60, interval=2): + print("Web server is responding to HTTP requests") + # Proceed with tests... +``` + +### Database Connection Waiting + +```python +from testcontainers.postgres import PostgresContainer +from testcontainers.core.waiting_utils import wait_container_is_ready +import psycopg2 + +class CustomPostgresContainer(PostgresContainer): + @wait_container_is_ready(psycopg2.OperationalError) + def _connect(self): + """Custom connection method with automatic retry.""" + conn = psycopg2.connect(self.get_connection_url()) + cursor = conn.cursor() + cursor.execute("SELECT 1") + cursor.fetchone() + conn.close() + +# Use custom container with automatic connection retry +with CustomPostgresContainer("postgres:13") as postgres: + # Container automatically waits for successful connection + connection_url = postgres.get_connection_url() + print(f"PostgreSQL ready at: {connection_url}") +``` + +### Complex Readiness Checking + +```python +from testcontainers.core.container import DockerContainer +from testcontainers.core.waiting_utils import wait_for_logs, wait_for +import requests +import time + +class WebAppContainer(DockerContainer): + def __init__(self, image): + super().__init__(image) + self.with_exposed_ports(8080) + + def wait_for_readiness(self): + """Wait for multiple readiness conditions.""" + # First, wait for application startup logs + wait_for_logs(self, "Application started", timeout=60) + + # Then wait for health endpoint to respond + host = self.get_container_host_ip() + port = self.get_exposed_port(8080) + + def health_check(): + try: + response = requests.get(f"http://{host}:{port}/health", timeout=2) + return response.status_code == 200 and response.json().get("status") == "healthy" + except: + return False + + if not wait_for(health_check, timeout=30): + raise Exception("Application failed health check") + + print("Application is fully ready") + +# Use comprehensive readiness checking +with WebAppContainer("my-web-app:latest") as app: + app.wait_for_readiness() + # Application is now fully ready for testing +``` + +### Waiting with Custom Timeouts + +```python +from testcontainers.core.container import DockerContainer +from testcontainers.core.waiting_utils import wait_for_logs + +# Different containers may need different timeout strategies +containers = [ + ("redis:6", "Ready to accept connections", 15), + ("postgres:13", "database system is ready", 45), + ("elasticsearch:7.15.0", "started", 120) +] + +for image, log_pattern, timeout in containers: + with DockerContainer(image) as container: + try: + delay = wait_for_logs(container, log_pattern, timeout=timeout) + print(f"{image} ready after {delay:.2f}s") + except TimeoutError: + print(f"{image} failed to start within {timeout}s") + # Handle timeout appropriately +``` + +### Parallel Container Startup + +```python +from testcontainers.postgres import PostgresContainer +from testcontainers.redis import RedisContainer +from testcontainers.core.waiting_utils import wait_for +import threading +import time + +def start_and_wait(container, name): + """Start container and wait for readiness.""" + container.start() + + # Different waiting strategies per container type + if isinstance(container, PostgresContainer): + def pg_ready(): + try: + import psycopg2 + conn = psycopg2.connect(container.get_connection_url()) + conn.close() + return True + except: + return False + wait_for(pg_ready, timeout=45) + + elif isinstance(container, RedisContainer): + client = container.get_client() + wait_for(lambda: client.ping(), timeout=15) + + print(f"{name} is ready") + +# Start containers in parallel +postgres = PostgresContainer("postgres:13") +redis = RedisContainer("redis:6") + +threads = [ + threading.Thread(target=start_and_wait, args=(postgres, "PostgreSQL")), + threading.Thread(target=start_and_wait, args=(redis, "Redis")) +] + +start_time = time.time() +for thread in threads: + thread.start() + +for thread in threads: + thread.join() + +print(f"All containers ready in {time.time() - start_time:.2f} seconds") + +# Clean up +postgres.stop() +redis.stop() +``` + +## Error Handling + +### Common Exceptions + +```python { .api } +from testcontainers.core.exceptions import ( + ContainerStartException, + ContainerConnectException, + TimeoutError +) + +try: + with DockerContainer("problematic-image") as container: + wait_for_logs(container, "ready", timeout=30) +except ContainerStartException: + print("Container failed to start") +except TimeoutError: + print("Container did not become ready within timeout") +except ContainerConnectException: + print("Failed to connect to container") +``` + +### Graceful Timeout Handling + +```python +from testcontainers.core.waiting_utils import wait_for +import logging + +def wait_with_fallback(condition, primary_timeout=60, fallback_timeout=30): + """Wait with fallback strategy.""" + try: + if wait_for(condition, timeout=primary_timeout): + return True + else: + logging.warning(f"Primary wait timed out after {primary_timeout}s, trying fallback") + return wait_for(condition, timeout=fallback_timeout, interval=0.1) + except Exception as e: + logging.error(f"Wait failed: {e}") + return False +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-testcontainers/docs/web-testing.md b/.tessl/tiles/tessl/pypi-testcontainers/docs/web-testing.md new file mode 100644 index 0000000..7ce359e --- /dev/null +++ b/.tessl/tiles/tessl/pypi-testcontainers/docs/web-testing.md @@ -0,0 +1,612 @@ +# Web and Testing Containers + +Containers for web services, browser automation, and testing infrastructure including Nginx, Selenium WebDriver, and specialized testing utilities for comprehensive web application testing. + +## Capabilities + +### Browser WebDriver Container + +Selenium browser container for Chrome and Firefox automation with VNC support and video recording capabilities. + +```python { .api } +class BrowserWebDriverContainer: + def __init__( + self, + capabilities: dict, + options: Optional[Any] = None, + image: Optional[str] = None, + port: int = 4444, + vnc_port: int = 5900, + **kwargs: Any + ): + """ + Initialize browser WebDriver container. + + Args: + capabilities: Selenium capabilities dictionary + options: Browser-specific options + image: Docker image (auto-selected if None) + port: Selenium Grid port (default 4444) + vnc_port: VNC port for remote viewing (default 5900) + **kwargs: Additional container options + """ + + def get_driver(self): + """ + Get configured WebDriver instance. + + Returns: + Selenium WebDriver instance + """ + + def get_connection_url(self) -> str: + """ + Get Selenium Grid connection URL. + + Returns: + Selenium Grid URL string + """ + + def with_options(self, options: Any) -> "BrowserWebDriverContainer": + """ + Set browser-specific options. + + Args: + options: Chrome/Firefox options object + + Returns: + Self for method chaining + """ + + def with_video(self, image: Optional[str] = None, video_path: Optional[str] = None) -> "BrowserWebDriverContainer": + """ + Enable video recording of browser session. + + Args: + image: Video recorder image + video_path: Host path to save videos + + Returns: + Self for method chaining + """ +``` + +### Nginx Container + +Nginx web server container for serving static content, reverse proxy testing, and web server functionality. + +```python { .api } +class NginxContainer: + def __init__( + self, + image: str = "nginx:alpine", + port: int = 80, + **kwargs: Any + ): + """ + Initialize Nginx container. + + Args: + image: Nginx Docker image + port: HTTP port (default 80) + **kwargs: Additional container options + """ + + def get_url(self) -> str: + """ + Get Nginx server URL. + + Returns: + Nginx server URL string + """ +``` + +### Testing Utility Containers + +Specialized containers for testing scenarios and development utilities. + +```python { .api } +class MailpitContainer: + def __init__( + self, + image: str = "axllent/mailpit:latest", + smtp_port: int = 1025, + web_port: int = 8025, + **kwargs: Any + ): + """ + Initialize Mailpit email testing container. + + Args: + image: Mailpit Docker image + smtp_port: SMTP server port (default 1025) + web_port: Web interface port (default 8025) + **kwargs: Additional container options + """ + + def get_smtp_connection_url(self) -> str: + """ + Get SMTP connection URL. + + Returns: + SMTP connection URL string + """ + + def get_web_url(self) -> str: + """ + Get web interface URL. + + Returns: + Web interface URL string + """ + +class SftpContainer: + def __init__( + self, + image: str = "atmoz/sftp:latest", + port: int = 22, + username: str = "testuser", + password: str = "testpass", + **kwargs: Any + ): + """ + Initialize SFTP server container. + + Args: + image: SFTP Docker image + port: SFTP port (default 22) + username: SFTP username + password: SFTP password + **kwargs: Additional container options + """ + + def get_connection_url(self) -> str: + """ + Get SFTP connection URL. + + Returns: + SFTP connection URL string + """ +``` + +## Usage Examples + +### Selenium Browser Automation + +```python +from testcontainers.selenium import BrowserWebDriverContainer +from selenium.webdriver.common.by import By +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC + +# Chrome browser automation +chrome_capabilities = { + "browserName": "chrome", + "browserVersion": "latest" +} + +with BrowserWebDriverContainer(chrome_capabilities) as chrome: + # Get WebDriver instance + driver = chrome.get_driver() + + try: + # Navigate to a website + driver.get("https://example.com") + + # Wait for page to load + wait = WebDriverWait(driver, 10) + title_element = wait.until( + EC.presence_of_element_located((By.TAG_NAME, "h1")) + ) + + # Interact with page + print(f"Page title: {driver.title}") + print(f"H1 text: {title_element.text}") + + # Take screenshot + driver.save_screenshot("example_page.png") + + # Find and click elements + links = driver.find_elements(By.TAG_NAME, "a") + print(f"Found {len(links)} links on the page") + + finally: + driver.quit() +``` + +### Firefox with Custom Options + +```python +from testcontainers.selenium import BrowserWebDriverContainer +from selenium.webdriver.firefox.options import Options + +# Configure Firefox options +firefox_options = Options() +firefox_options.add_argument("--headless") # Run in background +firefox_options.set_preference("network.http.pipelining", True) + +firefox_capabilities = { + "browserName": "firefox", + "browserVersion": "latest" +} + +with BrowserWebDriverContainer(firefox_capabilities) as firefox: + firefox.with_options(firefox_options) + + driver = firefox.get_driver() + + try: + # Test JavaScript execution + driver.get("data:text/html,

Hello World

") + + # Execute JavaScript + result = driver.execute_script("return document.getElementById('test').textContent;") + print(f"JavaScript result: {result}") + + # Test page performance + navigation_start = driver.execute_script("return window.performance.timing.navigationStart") + load_complete = driver.execute_script("return window.performance.timing.loadEventEnd") + page_load_time = load_complete - navigation_start + + print(f"Page load time: {page_load_time}ms") + + finally: + driver.quit() +``` + +### Web Application Testing with Nginx + +```python +from testcontainers.nginx import NginxContainer +import requests +import tempfile +import os + +# Create test HTML content +test_html = """ + + + + Test Page + + +

Welcome to Test Site

+
+

This is a test page served by Nginx.

+
+ + +
+
+ + +""" + +# Create temporary directory with test content +with tempfile.TemporaryDirectory() as temp_dir: + # Write test HTML file + html_file = os.path.join(temp_dir, "index.html") + with open(html_file, "w") as f: + f.write(test_html) + + # Start Nginx container with custom content + nginx = NginxContainer("nginx:alpine") \ + .with_volume_mapping(temp_dir, "/usr/share/nginx/html", "ro") \ + .with_exposed_ports(80) + + with nginx: + # Get server URL + server_url = nginx.get_url() + + # Test static content serving + response = requests.get(server_url) + assert response.status_code == 200 + assert "Welcome to Test Site" in response.text + + # Test different HTTP methods + head_response = requests.head(server_url) + assert head_response.status_code == 200 + + # Test non-existent page + not_found = requests.get(f"{server_url}/nonexistent") + assert not_found.status_code == 404 + + print(f"Nginx serving content at: {server_url}") + print(f"Content length: {len(response.text)} bytes") +``` + +### Email Testing with Mailpit + +```python +from testcontainers.mailpit import MailpitContainer +import smtplib +from email.mime.text import MIMEText +from email.mime.multipart import MIMEMultipart +import requests + +with MailpitContainer() as mailpit: + # Get connection details + smtp_url = mailpit.get_smtp_connection_url() + web_url = mailpit.get_web_url() + + # Parse SMTP connection + smtp_host = smtp_url.split("://")[1].split(":")[0] + smtp_port = int(smtp_url.split(":")[2]) + + # Send test emails + with smtplib.SMTP(smtp_host, smtp_port) as server: + # Send plain text email + plain_msg = MIMEText("This is a plain text test email.") + plain_msg["Subject"] = "Plain Text Test" + plain_msg["From"] = "sender@example.com" + plain_msg["To"] = "recipient@example.com" + + server.send_message(plain_msg) + + # Send HTML email + html_msg = MIMEMultipart("alternative") + html_msg["Subject"] = "HTML Test Email" + html_msg["From"] = "sender@example.com" + html_msg["To"] = "recipient@example.com" + + html_content = """ + + +

Test Email

+

This is an HTML test email.

+ Click here + + + """ + + html_part = MIMEText(html_content, "html") + html_msg.attach(html_part) + + server.send_message(html_msg) + + # Check emails via web API + import time + time.sleep(1) # Wait for emails to be processed + + # Get emails via Mailpit API + api_response = requests.get(f"{web_url}/api/v1/messages") + emails = api_response.json() + + print(f"Received {len(emails['messages'])} emails") + for email in emails["messages"]: + print(f"- Subject: {email['Subject']}") + print(f" From: {email['From']['Address']}") + print(f" To: {email['To'][0]['Address']}") +``` + +### SFTP File Transfer Testing + +```python +from testcontainers.sftp import SftpContainer +import paramiko +import io + +with SftpContainer() as sftp: + connection_url = sftp.get_connection_url() + + # Parse connection details + host = connection_url.split("://")[1].split("@")[1].split(":")[0] + port = int(connection_url.split(":")[3]) + username = "testuser" + password = "testpass" + + # Create SSH client + ssh = paramiko.SSHClient() + ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + + try: + # Connect to SFTP server + ssh.connect(hostname=host, port=port, username=username, password=password) + sftp_client = ssh.open_sftp() + + # Upload file + test_content = "Hello, SFTP!\nThis is a test file." + file_buffer = io.StringIO(test_content) + + with sftp_client.open("test_upload.txt", "w") as remote_file: + remote_file.write(test_content) + + # List files + files = sftp_client.listdir(".") + print(f"Files on SFTP server: {files}") + + # Download file + with sftp_client.open("test_upload.txt", "r") as remote_file: + downloaded_content = remote_file.read() + print(f"Downloaded content: {downloaded_content}") + + # Create directory and upload multiple files + sftp_client.mkdir("test_directory") + + for i in range(3): + filename = f"test_directory/file_{i}.txt" + content = f"Content of file {i}" + with sftp_client.open(filename, "w") as remote_file: + remote_file.write(content) + + # List directory contents + dir_files = sftp_client.listdir("test_directory") + print(f"Files in test_directory: {dir_files}") + + finally: + sftp_client.close() + ssh.close() +``` + +### Complete Web Application Testing Stack + +```python +from testcontainers.selenium import BrowserWebDriverContainer +from testcontainers.nginx import NginxContainer +from testcontainers.mailpit import MailpitContainer +from testcontainers.postgres import PostgresContainer +from testcontainers.core.network import Network +import tempfile +import os + +# Create test web application +app_html = """ + + + + Test App + + + +

Test Application

+
+ + +
+
+ + +""" + +with tempfile.TemporaryDirectory() as temp_dir: + # Create test HTML + html_file = os.path.join(temp_dir, "index.html") + with open(html_file, "w") as f: + f.write(app_html) + + # Create network for services + with Network() as network: + # Start all services + with NginxContainer() as web_server, \ + MailpitContainer() as email_server, \ + PostgresContainer("postgres:13") as database, \ + BrowserWebDriverContainer({"browserName": "chrome"}) as browser: + + # Configure web server + web_server.with_volume_mapping(temp_dir, "/usr/share/nginx/html", "ro") + web_server.with_network(network).with_network_aliases("web") + + # Configure other services + email_server.with_network(network).with_network_aliases("mail") + database.with_network(network).with_network_aliases("db") + browser.with_network(network) + + # Get service URLs + web_url = web_server.get_url() + mail_web_url = email_server.get_web_url() + db_url = database.get_connection_url() + + print(f"Web server: {web_url}") + print(f"Mail server: {mail_web_url}") + print(f"Database: {db_url}") + + # Automated testing + driver = browser.get_driver() + + try: + # Test web application + driver.get(web_url) + + # Fill form + email_input = driver.find_element("id", "email") + email_input.send_keys("test@example.com") + + # Submit form + submit_button = driver.find_element("css selector", "button[type='submit']") + submit_button.click() + + # Verify result + from selenium.webdriver.support.ui import WebDriverWait + from selenium.webdriver.support import expected_conditions as EC + from selenium.webdriver.common.by import By + + wait = WebDriverWait(driver, 10) + result_element = wait.until( + EC.text_to_be_present_in_element((By.ID, "result"), "Form submitted successfully!") + ) + + print("✓ Web application test passed") + + # Take screenshot of success + driver.save_screenshot("test_success.png") + + finally: + driver.quit() + + print("✓ Complete web application testing stack verified") +``` + +### Performance Testing Setup + +```python +from testcontainers.nginx import NginxContainer +import requests +import time +import concurrent.futures +import statistics + +def performance_test(url, num_requests=100, concurrent_users=10): + """Run performance test against web server.""" + + def make_request(): + start_time = time.time() + try: + response = requests.get(url, timeout=10) + end_time = time.time() + return { + "status_code": response.status_code, + "response_time": end_time - start_time, + "success": response.status_code == 200 + } + except Exception as e: + return { + "status_code": 0, + "response_time": 0, + "success": False, + "error": str(e) + } + + # Run concurrent requests + results = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=concurrent_users) as executor: + futures = [executor.submit(make_request) for _ in range(num_requests)] + results = [future.result() for future in concurrent.futures.as_completed(futures)] + + # Calculate statistics + successful_requests = [r for r in results if r["success"]] + response_times = [r["response_time"] for r in successful_requests] + + if response_times: + stats = { + "total_requests": num_requests, + "successful_requests": len(successful_requests), + "success_rate": len(successful_requests) / num_requests * 100, + "avg_response_time": statistics.mean(response_times), + "min_response_time": min(response_times), + "max_response_time": max(response_times), + "median_response_time": statistics.median(response_times) + } + else: + stats = {"error": "No successful requests"} + + return stats + +# Run performance test +with NginxContainer() as nginx: + server_url = nginx.get_url() + + print(f"Running performance test against: {server_url}") + results = performance_test(server_url, num_requests=50, concurrent_users=5) + + print("\nPerformance Test Results:") + for key, value in results.items(): + if isinstance(value, float): + print(f"{key}: {value:.4f}") + else: + print(f"{key}: {value}") +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-testcontainers/tile.json b/.tessl/tiles/tessl/pypi-testcontainers/tile.json new file mode 100644 index 0000000..a807364 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-testcontainers/tile.json @@ -0,0 +1,7 @@ +{ + "name": "tessl/pypi-testcontainers", + "version": "4.12.0", + "docs": "docs/index.md", + "describes": "pkg:pypi/testcontainers@4.12.0", + "summary": "Python library for throwaway instances of anything that can run in a Docker container" +} \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-torch/docs/advanced-features.md b/.tessl/tiles/tessl/pypi-torch/docs/advanced-features.md new file mode 100644 index 0000000..92fc377 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-torch/docs/advanced-features.md @@ -0,0 +1,753 @@ +# Advanced Features + +JIT compilation, model export, graph transformations, quantization, and deployment utilities for optimizing and deploying PyTorch models in production environments. + +## Capabilities + +### JIT Compilation (torch.jit) + +TorchScript compilation for model optimization and deployment. + +```python { .api } +def jit.script(obj, optimize=None, _frames_up=0, _rcb=None): + """ + Compile Python code to TorchScript. + + Parameters: + - obj: Function, method, or class to compile + - optimize: Whether to apply optimizations + + Returns: + ScriptModule or ScriptFunction + """ + +def jit.trace(func, example_inputs, optimize=None, check_trace=True, check_inputs=None, check_tolerance=1e-5, strict=True, _force_outplace=False, _module_class=None, _compilation_unit=None): + """ + Trace function execution to create TorchScript. + + Parameters: + - func: Function or module to trace + - example_inputs: Example inputs for tracing + - optimize: Whether to apply optimizations + - check_trace: Whether to verify trace correctness + - strict: Whether to record all operations + + Returns: + TracedModule or function + """ + +def jit.load(f, map_location=None, _extra_files=None): + """Load TorchScript model from file.""" + +def jit.save(m, f, _extra_files=None): + """Save TorchScript model to file.""" + +class jit.ScriptModule(nn.Module): + """TorchScript compiled module.""" + def save(self, f, _extra_files=None): ... + def code(self) -> str: ... + def graph(self): ... + def code_with_constants(self) -> Tuple[str, List[Tensor]]: ... + +def jit.freeze(mod, preserved_attrs=None, optimize_numerics=True): + """Freeze TorchScript module for inference.""" + +def jit.optimize_for_inference(mod, other_methods=None): + """Optimize TorchScript module for inference.""" + +def jit.enable_onednn_fusion(enabled: bool): + """Enable/disable OneDNN fusion optimization.""" + +def jit.set_fusion_strategy(strategy: List[Tuple[str, bool]]): + """Set fusion strategy for optimization.""" +``` + +### Model Export (torch.export) + +Export PyTorch models for deployment and optimization. + +```python { .api } +def export.export(mod: nn.Module, args, kwargs=None, *, dynamic_shapes=None, strict=True) -> ExportedProgram: + """ + Export PyTorch module to exportable format. + + Parameters: + - mod: Module to export + - args: Example arguments + - kwargs: Example keyword arguments + - dynamic_shapes: Dynamic shape specifications + - strict: Whether to enforce strict export + + Returns: + ExportedProgram + """ + +class export.ExportedProgram: + """Exported PyTorch program.""" + def module(self) -> nn.Module: ... + def graph_module(self): ... + def graph_signature(self): ... + def call_spec(self): ... + def verifier(self): ... + def state_dict(self) -> Dict[str, Any]: ... + def named_parameters(self): ... + def named_buffers(self): ... + +def export.save(ep: ExportedProgram, f) -> None: + """Save exported program to file.""" + +def export.load(f) -> ExportedProgram: + """Load exported program from file.""" +``` + +### Model Compilation (torch.compile) + +Compile PyTorch models for performance optimization. + +```python { .api } +def compile(model=None, *, fullgraph=False, dynamic=None, backend="inductor", mode=None, options=None, disable=False): + """ + Compile PyTorch model for optimization. + + Parameters: + - model: Model to compile (or use as decorator) + - fullgraph: Whether to compile the entire graph + - dynamic: Enable dynamic shapes + - backend: Compilation backend ("inductor", "aot_eager", etc.) + - mode: Compilation mode ("default", "reduce-overhead", "max-autotune") + - options: Backend-specific options + - disable: Disable compilation + + Returns: + Compiled model + """ + +@compile +def compiled_function(x): + """Example of function compilation.""" + return x * 2 + 1 + +# Alternative usage +compiled_model = torch.compile(model, mode="max-autotune") +``` + +### Graph Transformations (torch.fx) + +Symbolic tracing and graph manipulation for model analysis and optimization. + +```python { .api } +class fx.GraphModule(nn.Module): + """Module with FX graph representation.""" + def __init__(self, root, graph, class_name='GraphModule'): ... + def recompile(self): ... + def code(self) -> str: ... + def graph(self): ... + def print_readable(self, print_output=True): ... + +def fx.symbolic_trace(root, concrete_args=None, meta_args=None, _force_outplace=False) -> GraphModule: + """ + Symbolically trace PyTorch module. + + Parameters: + - root: Module or function to trace + - concrete_args: Arguments to keep concrete + - meta_args: Meta tensor arguments + + Returns: + GraphModule with traced computation graph + """ + +class fx.Tracer: + """Tracer for symbolic execution.""" + def trace(self, root, concrete_args=None): ... + def call_module(self, m, forward, args, kwargs): ... + def call_function(self, target, args, kwargs): ... + def call_method(self, target, args, kwargs): ... + +class fx.Graph: + """Computational graph representation.""" + def nodes(self): ... + def create_node(self, op, target, args=None, kwargs=None, name=None, type_expr=None): ... + def erase_node(self, to_erase): ... + def inserting_before(self, n): ... + def inserting_after(self, n): ... + def lint(self): ... + def print_tabular(self): ... + +class fx.Node: + """Node in FX graph.""" + def replace_all_uses_with(self, replace_with): ... + def replace_input_with(self, old_input, new_input): ... + def append(self, x): ... + def prepend(self, x): ... + +def fx.replace_pattern(gm: GraphModule, pattern, replacement) -> List[Match]: + """Replace patterns in graph.""" + +class fx.Interpreter: + """Base class for FX graph interpreters.""" + def run(self, *args, **kwargs): ... + def run_node(self, n): ... + def call_function(self, target, args, kwargs): ... + def call_method(self, target, args, kwargs): ... + def call_module(self, target, args, kwargs): ... +``` + +### Quantization (torch.quantization) + +Model quantization for efficient deployment. + +```python { .api } +def quantization.quantize_dynamic(model, qconfig_spec=None, dtype=torch.qint8, mapping=None, inplace=False, remove_qconfig=True): + """ + Dynamic quantization of model. + + Parameters: + - model: Model to quantize + - qconfig_spec: Quantization configuration + - dtype: Target quantized data type + - mapping: Custom op mapping + - inplace: Whether to modify model in-place + + Returns: + Quantized model + """ + +def quantization.quantize(model, run_fn, run_args, mapping=None, inplace=False): + """Post-training static quantization.""" + +def quantization.prepare(model, inplace=False, allow_list=None, observer_non_leaf_module_list=None, prepare_custom_config_dict=None): + """Prepare model for quantization aware training.""" + +def quantization.convert(model, mapping=None, inplace=False, remove_qconfig=True, convert_custom_config_dict=None): + """Convert prepared model to quantized version.""" + +def quantization.prepare_qat(model, mapping=None, inplace=False): + """Prepare model for quantization aware training.""" + +class quantization.QuantStub(nn.Module): + """Quantization stub for marking quantization points.""" + def __init__(self, qconfig=None): ... + def forward(self, x): ... + +class quantization.DeQuantStub(nn.Module): + """Dequantization stub for marking dequantization points.""" + def __init__(self): ... + def forward(self, x): ... + +class quantization.QConfig: + """Quantization configuration.""" + def __init__(self, activation, weight): ... + +def quantization.get_default_qconfig(backend='fbgemm'): + """Get default quantization configuration.""" + +def quantization.get_default_qat_qconfig(backend='fbgemm'): + """Get default QAT quantization configuration.""" + +class quantization.FakeQuantize(nn.Module): + """Fake quantization for QAT.""" + def __init__(self, observer=MinMaxObserver, quant_min=0, quant_max=255, **observer_kwargs): ... + def forward(self, X): ... + def calculate_qparams(self): ... +``` + +### ONNX Export (torch.onnx) + +Export PyTorch models to ONNX format for interoperability. + +```python { .api } +def onnx.export(model, args, f, export_params=True, verbose=False, training=TrainingMode.EVAL, + input_names=None, output_names=None, operator_export_type=OperatorExportTypes.ONNX, + opset_version=None, do_constant_folding=True, dynamic_axes=None, keep_initializers_as_inputs=None, + custom_opsets=None, enable_onnx_checker=True, use_external_data_format=False): + """ + Export PyTorch model to ONNX format. + + Parameters: + - model: PyTorch model to export + - args: Model input arguments + - f: File path or file-like object to save to + - export_params: Whether to export parameters + - verbose: Enable verbose output + - training: Training mode (EVAL, TRAINING, PRESERVE) + - input_names: Names for input nodes + - output_names: Names for output nodes + - opset_version: ONNX opset version + - dynamic_axes: Dynamic input/output axes + - custom_opsets: Custom operator sets + """ + +def onnx.dynamo_export(model, *model_args, export_options=None, **model_kwargs) -> ONNXProgram: + """Export using torch.export and Dynamo.""" + +class onnx.ONNXProgram: + """ONNX program representation.""" + def save(self, destination): ... + def model_proto(self): ... + +def onnx.load(f) -> ModelProto: + """Load ONNX model.""" + +def onnx.save(model, f, export_params=True): + """Save ONNX model to file.""" + +class onnx.TrainingMode(Enum): + """Training mode for ONNX export.""" + EVAL = 0 + TRAINING = 1 + PRESERVE = 2 + +class onnx.OperatorExportTypes(Enum): + """Operator export types.""" + ONNX = 0 + ONNX_ATEN = 1 + ONNX_ATEN_FALLBACK = 2 +``` + +### Mobile Deployment (torch.utils.mobile_optimizer) + +Optimization utilities for mobile deployment. + +```python { .api } +def utils.mobile_optimizer.optimize_for_mobile(script_module, optimization_blocklist=None, preserved_methods=None, backend='CPU'): + """ + Optimize TorchScript module for mobile deployment. + + Parameters: + - script_module: TorchScript module to optimize + - optimization_blocklist: Operations to exclude from optimization + - preserved_methods: Methods to preserve during optimization + - backend: Target backend ('CPU', 'Vulkan', 'Metal') + + Returns: + Optimized TorchScript module + """ + +class utils.mobile_optimizer.LiteScriptModule: + """Lightweight script module for mobile.""" + def forward(self, *args): ... + def get_debug_info(self): ... +``` + +### TensorRT Integration + +NVIDIA TensorRT integration for GPU inference optimization. + +```python { .api } +def tensorrt.compile(model, inputs, enabled_precisions={torch.float}, workspace_size=1 << 22, + min_block_size=3, torch_executed_ops=None, torch_executed_modules=None): + """ + Compile model with TensorRT. + + Parameters: + - model: PyTorch model to compile + - inputs: Example inputs for compilation + - enabled_precisions: Allowed precision types + - workspace_size: TensorRT workspace size + - min_block_size: Minimum block size for TensorRT subgraphs + + Returns: + TensorRT compiled model + """ +``` + +### Automatic Mixed Precision (torch.amp) + +Automatic mixed precision training for performance and memory optimization. + +```python { .api } +class amp.GradScaler: + """Gradient scaler for mixed precision training.""" + def __init__(self, init_scale=2**16, growth_factor=2.0, backoff_factor=0.5, growth_interval=2000, enabled=True): + """ + Parameters: + - init_scale: Initial scale factor + - growth_factor: Scale growth factor + - backoff_factor: Scale reduction factor + - growth_interval: Steps between scale increases + - enabled: Whether scaler is enabled + """ + + def scale(self, outputs): ... + def step(self, optimizer): ... + def update(self): ... + def unscale_(self, optimizer): ... + def get_scale(self): ... + def get_growth_factor(self): ... + def set_growth_factor(self, new_factor): ... + def get_backoff_factor(self): ... + def set_backoff_factor(self, new_factor): ... + def get_growth_interval(self): ... + def set_growth_interval(self, new_interval): ... + def is_enabled(self): ... + def state_dict(self): ... + def load_state_dict(self, state_dict): ... + +def amp.autocast(device_type='cuda', dtype=None, enabled=True, cache_enabled=None): + """ + Context manager for automatic mixed precision. + + Parameters: + - device_type: Device type ('cuda', 'cpu', 'xpu') + - dtype: Target dtype (torch.float16, torch.bfloat16) + - enabled: Whether autocast is enabled + - cache_enabled: Whether to cache autocast state + """ +``` + +### Model Optimization and Pruning (torch.ao) + +Advanced optimization techniques including pruning and sparsity. + +```python { .api } +def ao.pruning.prune_low_magnitude(model, amount, importance_scores=None, structured=False, dim=None): + """ + Prune model by removing low magnitude weights. + + Parameters: + - model: Model to prune + - amount: Fraction of weights to prune + - importance_scores: Custom importance scores + - structured: Whether to use structured pruning + - dim: Dimension for structured pruning + + Returns: + Pruned model + """ + +class ao.pruning.WeightNormSparsifier: + """Weight norm based sparsifier.""" + def __init__(self, sparsity_level=0.5): ... + def update_mask(self, module, tensor_name, **kwargs): ... + +class ao.quantization.QConfigMapping: + """Quantization configuration mapping.""" + def set_global(self, qconfig): ... + def set_object_type(self, object_type, qconfig): ... + def set_module_name(self, module_name, qconfig): ... + +def ao.quantization.get_default_qconfig_mapping(backend='x86'): + """Get default quantization configuration mapping.""" + +class ao.quantization.FusedMovingAvgObsFakeQuantize(nn.Module): + """Fused moving average observer fake quantize.""" + def __init__(self, observer=MovingAverageMinMaxObserver, **observer_kwargs): ... +``` + +## Usage Examples + +### TorchScript Compilation + +```python +import torch +import torch.nn as nn + +# Define model +class SimpleModel(nn.Module): + def __init__(self): + super(SimpleModel, self).__init__() + self.linear = nn.Linear(10, 5) + + def forward(self, x): + return torch.relu(self.linear(x)) + +model = SimpleModel() +model.eval() + +# Script compilation +scripted_model = torch.jit.script(model) +print(scripted_model.code) + +# Trace compilation +example_input = torch.randn(1, 10) +traced_model = torch.jit.trace(model, example_input) + +# Save/load +torch.jit.save(scripted_model, 'model_scripted.pt') +loaded_model = torch.jit.load('model_scripted.pt') + +# Optimization for inference +optimized_model = torch.jit.optimize_for_inference(scripted_model) + +print("TorchScript compilation completed") +``` + +### Model Export and Deployment + +```python +import torch +import torch.nn as nn +from torch.export import export + +# Define model +class ExportModel(nn.Module): + def __init__(self): + super().__init__() + self.conv = nn.Conv2d(3, 16, 3, padding=1) + self.pool = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = nn.Linear(16, 10) + + def forward(self, x): + x = torch.relu(self.conv(x)) + x = self.pool(x) + x = x.flatten(1) + return self.fc(x) + +model = ExportModel() +example_input = torch.randn(1, 3, 32, 32) + +# Export to ExportedProgram +exported_program = export(model, (example_input,)) + +# Save exported program +torch.export.save(exported_program, 'exported_model.pt2') + +# Load exported program +loaded_program = torch.export.load('exported_model.pt2') + +# Use exported program +output = loaded_program.module()(example_input) +print(f"Export completed, output shape: {output.shape}") +``` + +### Torch Compile Usage + +```python +import torch +import torch.nn as nn + +# Define model +model = nn.Sequential( + nn.Linear(100, 200), + nn.ReLU(), + nn.Linear(200, 100), + nn.ReLU(), + nn.Linear(100, 10) +) + +# Compile with different modes +default_compiled = torch.compile(model) +fast_compiled = torch.compile(model, mode="reduce-overhead") +optimal_compiled = torch.compile(model, mode="max-autotune") + +# Use as decorator +@torch.compile +def custom_function(x, y): + return x.matmul(y) + x.sum() + +# Example usage +x = torch.randn(32, 100) +y = torch.randn(100, 50) + +# Compiled function +result = custom_function(x, y) + +# Compiled model +output = optimal_compiled(x) + +print(f"Torch compile completed, output shape: {output.shape}") +``` + +### Quantization Example + +```python +import torch +import torch.nn as nn +import torch.quantization as quant + +# Define model +class QuantModel(nn.Module): + def __init__(self): + super().__init__() + self.quant = quant.QuantStub() + self.conv1 = nn.Conv2d(3, 32, 3, padding=1) + self.relu1 = nn.ReLU() + self.conv2 = nn.Conv2d(32, 64, 3, padding=1) + self.relu2 = nn.ReLU() + self.pool = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = nn.Linear(64, 10) + self.dequant = quant.DeQuantStub() + + def forward(self, x): + x = self.quant(x) + x = self.relu1(self.conv1(x)) + x = self.relu2(self.conv2(x)) + x = self.pool(x) + x = x.flatten(1) + x = self.fc(x) + x = self.dequant(x) + return x + +model = QuantModel() +model.eval() + +# Dynamic quantization +quantized_model = quant.quantize_dynamic( + model, {nn.Linear}, dtype=torch.qint8 +) + +# Post-training static quantization +model.qconfig = quant.get_default_qconfig('fbgemm') +prepared_model = quant.prepare(model) + +# Calibration (example data) +for _ in range(10): + calibration_data = torch.randn(1, 3, 32, 32) + prepared_model(calibration_data) + +# Convert to quantized model +quantized_static_model = quant.convert(prepared_model) + +print("Quantization completed") +print(f"Original model size: {sum(p.numel() for p in model.parameters())}") +print(f"Quantized model parameters: {sum(p.numel() for p in quantized_model.parameters())}") +``` + +### ONNX Export + +```python +import torch +import torch.nn as nn +import torch.onnx + +# Define model +class ONNXModel(nn.Module): + def __init__(self): + super().__init__() + self.backbone = nn.Sequential( + nn.Conv2d(3, 64, 7, stride=2, padding=3), + nn.BatchNorm2d(64), + nn.ReLU(inplace=True), + nn.AdaptiveAvgPool2d((1, 1)), + nn.Flatten(), + nn.Linear(64, 1000) + ) + + def forward(self, x): + return self.backbone(x) + +model = ONNXModel() +model.eval() + +# Example input +dummy_input = torch.randn(1, 3, 224, 224) + +# Export to ONNX +torch.onnx.export( + model, + dummy_input, + "model.onnx", + export_params=True, + opset_version=11, + do_constant_folding=True, + input_names=['input'], + output_names=['output'], + dynamic_axes={ + 'input': {0: 'batch_size'}, + 'output': {0: 'batch_size'} + } +) + +print("ONNX export completed") +``` + +### FX Graph Manipulation + +```python +import torch +import torch.nn as nn +import torch.fx as fx + +# Define model +class FXModel(nn.Module): + def __init__(self): + super().__init__() + self.conv1 = nn.Conv2d(3, 32, 3) + self.conv2 = nn.Conv2d(32, 64, 3) + self.relu = nn.ReLU() + self.pool = nn.AdaptiveAvgPool2d((1, 1)) + self.fc = nn.Linear(64, 10) + + def forward(self, x): + x = self.relu(self.conv1(x)) + x = self.relu(self.conv2(x)) + x = self.pool(x) + x = x.flatten(1) + x = self.fc(x) + return x + +# Symbolic tracing +model = FXModel() +traced = fx.symbolic_trace(model) + +# Print graph +print("Original graph:") +traced.graph.print_tabular() + +# Graph manipulation - replace ReLU with GELU +for node in traced.graph.nodes: + if node.target == torch.relu: + with traced.graph.inserting_after(node): + new_node = traced.graph.call_function(torch.nn.functional.gelu, args=(node.args[0],)) + node.replace_all_uses_with(new_node) + traced.graph.erase_node(node) + +# Recompile +traced.recompile() + +print("\nModified graph:") +traced.graph.print_tabular() + +# Test modified model +test_input = torch.randn(1, 3, 32, 32) +output = traced(test_input) +print(f"FX transformation completed, output shape: {output.shape}") +``` + +### Mixed Precision Training + +```python +import torch +import torch.nn as nn +import torch.optim as optim +from torch.cuda.amp import autocast, GradScaler + +# Define model and training setup +model = nn.Sequential( + nn.Linear(1000, 500), + nn.ReLU(), + nn.Linear(500, 100), + nn.ReLU(), + nn.Linear(100, 10) +).cuda() + +optimizer = optim.Adam(model.parameters(), lr=0.001) +criterion = nn.CrossEntropyLoss() +scaler = GradScaler() + +# Training loop with mixed precision +model.train() +for epoch in range(5): + for batch_idx in range(100): # Simulate 100 batches + # Generate dummy data + data = torch.randn(32, 1000).cuda() + targets = torch.randint(0, 10, (32,)).cuda() + + optimizer.zero_grad() + + # Forward pass with autocast + with autocast(): + outputs = model(data) + loss = criterion(outputs, targets) + + # Backward pass with gradient scaling + scaler.scale(loss).backward() + scaler.step(optimizer) + scaler.update() + + if batch_idx % 25 == 0: + print(f"Epoch {epoch}, Batch {batch_idx}, Loss: {loss.item():.4f}, Scale: {scaler.get_scale()}") + +print("Mixed precision training completed") +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-torch/docs/devices-distributed.md b/.tessl/tiles/tessl/pypi-torch/docs/devices-distributed.md new file mode 100644 index 0000000..c458405 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-torch/docs/devices-distributed.md @@ -0,0 +1,674 @@ +# Device and Distributed Computing + +Device management, CUDA operations, distributed training, and multi-GPU support for scaling deep learning workloads across different hardware platforms including CPU, CUDA, MPS, and XPU. + +## Capabilities + +### Device Management + +Core device detection, selection, and management functions. + +```python { .api } +class device: + """Device specification for tensor placement.""" + def __init__(self, device_string: str): ... + def __str__(self) -> str: ... + def __repr__(self) -> str: ... + +def get_default_device() -> device: + """Get the default device for new tensors.""" + +def set_default_device(device) -> None: + """Set the default device for new tensors.""" + +def get_device(tensor_or_device) -> device: + """Get device of tensor or validate device specification.""" +``` + +### CUDA Operations (torch.cuda) + +CUDA device management and GPU acceleration functions. + +```python { .api } +def cuda.is_available() -> bool: + """Check if CUDA is available.""" + +def cuda.device_count() -> int: + """Number of available CUDA devices.""" + +def cuda.get_device_name(device=None) -> str: + """Get name of CUDA device.""" + +def cuda.get_device_properties(device) -> _CudaDeviceProperties: + """Get properties of CUDA device.""" + +def cuda.get_device_capability(device=None) -> Tuple[int, int]: + """Get compute capability of device.""" + +def cuda.current_device() -> int: + """Get current CUDA device index.""" + +def cuda.set_device(device) -> None: + """Set current CUDA device.""" + +def cuda.device(device) -> ContextManager: + """Context manager for device selection.""" + +def cuda.stream(stream=None) -> ContextManager: + """Context manager for CUDA stream selection.""" + +def cuda.synchronize(device=None) -> None: + """Synchronize all kernels on device.""" + +def cuda.is_initialized() -> bool: + """Check if CUDA is initialized.""" + +def cuda.init() -> None: + """Initialize CUDA.""" +``` + +### CUDA Memory Management + +GPU memory allocation, caching, and profiling. + +```python { .api } +def cuda.empty_cache() -> None: + """Free unused cached memory.""" + +def cuda.memory_allocated(device=None) -> int: + """Get currently allocated memory in bytes.""" + +def cuda.max_memory_allocated(device=None) -> int: + """Get peak allocated memory in bytes.""" + +def cuda.memory_reserved(device=None) -> int: + """Get currently reserved memory in bytes.""" + +def cuda.max_memory_reserved(device=None) -> int: + """Get peak reserved memory in bytes.""" + +def cuda.memory_cached(device=None) -> int: + """Get currently cached memory in bytes.""" + +def cuda.max_memory_cached(device=None) -> int: + """Get peak cached memory in bytes.""" + +def cuda.reset_max_memory_allocated(device=None) -> None: + """Reset peak memory stats.""" + +def cuda.reset_max_memory_cached(device=None) -> None: + """Reset peak cache stats.""" + +def cuda.memory_stats(device=None) -> Dict[str, Any]: + """Get comprehensive memory statistics.""" + +def cuda.memory_summary(device=None, abbreviated=False) -> str: + """Get human-readable memory summary.""" + +def cuda.memory_snapshot() -> List[Dict[str, Any]]: + """Get detailed memory snapshot.""" + +def cuda.set_per_process_memory_fraction(fraction: float, device=None) -> None: + """Set memory fraction for process.""" + +def cuda.get_per_process_memory_fraction(device=None) -> float: + """Get memory fraction for process.""" +``` + +### CUDA Streams and Events + +Asynchronous execution control for GPU operations. + +```python { .api } +class cuda.Stream: + """CUDA stream for asynchronous operations.""" + def __init__(self, device=None, priority=0): ... + def wait_event(self, event): ... + def wait_stream(self, stream): ... + def record_event(self, event=None): ... + def query(self) -> bool: ... + def synchronize(self): ... + +class cuda.Event: + """CUDA event for synchronization.""" + def __init__(self, enable_timing=False, blocking=False, interprocess=False): ... + def record(self, stream=None): ... + def wait(self, stream=None): ... + def query(self) -> bool: ... + def synchronize(self): ... + def elapsed_time(self, event) -> float: ... + +def cuda.current_stream(device=None) -> cuda.Stream: + """Get current CUDA stream.""" + +def cuda.default_stream(device=None) -> cuda.Stream: + """Get default CUDA stream.""" + +def cuda.set_stream(stream) -> None: + """Set current CUDA stream.""" +``` + +### CUDA Random Number Generation + +GPU random number generation functions. + +```python { .api } +def cuda.manual_seed(seed: int) -> None: + """Set CUDA random seed.""" + +def cuda.manual_seed_all(seed: int) -> None: + """Set CUDA random seed for all devices.""" + +def cuda.seed() -> None: + """Generate random CUDA seed.""" + +def cuda.seed_all() -> None: + """Generate random CUDA seed for all devices.""" + +def cuda.initial_seed() -> int: + """Get initial CUDA random seed.""" + +def cuda.get_rng_state(device='cuda') -> Tensor: + """Get CUDA random number generator state.""" + +def cuda.get_rng_state_all() -> List[Tensor]: + """Get CUDA RNG state for all devices.""" + +def cuda.set_rng_state(new_state: Tensor, device='cuda') -> None: + """Set CUDA random number generator state.""" + +def cuda.set_rng_state_all(new_states: List[Tensor]) -> None: + """Set CUDA RNG state for all devices.""" +``` + +### MPS Operations (torch.mps) + +Metal Performance Shaders for Apple Silicon GPU acceleration. + +```python { .api } +def mps.is_available() -> bool: + """Check if MPS is available.""" + +def mps.is_built() -> bool: + """Check if PyTorch was built with MPS support.""" + +def mps.get_default_generator() -> Generator: + """Get default MPS random number generator.""" + +def mps.manual_seed(seed: int) -> None: + """Set MPS random seed.""" + +def mps.seed() -> None: + """Generate random MPS seed.""" + +def mps.synchronize() -> None: + """Synchronize MPS operations.""" + +def mps.empty_cache() -> None: + """Free unused MPS memory.""" + +def mps.set_per_process_memory_fraction(fraction: float) -> None: + """Set MPS memory fraction.""" + +class mps.Event: + """MPS event for synchronization.""" + def __init__(self): ... + def query(self) -> bool: ... + def synchronize(self): ... + def wait(self): ... +``` + +### XPU Operations (torch.xpu) + +Intel XPU backend support for Intel GPUs. + +```python { .api } +def xpu.is_available() -> bool: + """Check if XPU is available.""" + +def xpu.device_count() -> int: + """Number of available XPU devices.""" + +def xpu.get_device_name(device=None) -> str: + """Get name of XPU device.""" + +def xpu.current_device() -> int: + """Get current XPU device index.""" + +def xpu.set_device(device) -> None: + """Set current XPU device.""" + +def xpu.synchronize(device=None) -> None: + """Synchronize XPU operations.""" + +def xpu.empty_cache() -> None: + """Free unused XPU memory.""" +``` + +### Distributed Computing (torch.distributed) + +Distributed training and multi-process communication. + +```python { .api } +def distributed.init_process_group(backend: str, init_method=None, timeout=default_pg_timeout, + world_size=-1, rank=-1, store=None, group_name='', pg_options=None) -> None: + """Initialize distributed process group.""" + +def distributed.destroy_process_group(group=None) -> None: + """Destroy process group.""" + +def distributed.get_rank(group=None) -> int: + """Get rank of current process.""" + +def distributed.get_world_size(group=None) -> int: + """Get number of processes in group.""" + +def distributed.is_available() -> bool: + """Check if distributed package is available.""" + +def distributed.is_initialized() -> bool: + """Check if distributed process group is initialized.""" + +def distributed.is_mpi_available() -> bool: + """Check if MPI backend is available.""" + +def distributed.is_nccl_available() -> bool: + """Check if NCCL backend is available.""" + +def distributed.is_gloo_available() -> bool: + """Check if Gloo backend is available.""" + +def distributed.is_torchelastic_launched() -> bool: + """Check if launched with TorchElastic.""" + +def distributed.get_backend(group=None) -> str: + """Get backend of process group.""" + +def distributed.barrier(group=None, async_op=False) -> Optional[Work]: + """Synchronize all processes.""" +``` + +### Collective Communication Operations + +Distributed communication primitives for multi-GPU training. + +```python { .api } +def distributed.broadcast(tensor: Tensor, src: int, group=None, async_op=False) -> Optional[Work]: + """Broadcast tensor from source to all processes.""" + +def distributed.all_reduce(tensor: Tensor, op=ReduceOp.SUM, group=None, async_op=False) -> Optional[Work]: + """Reduce tensor across all processes.""" + +def distributed.reduce(tensor: Tensor, dst: int, op=ReduceOp.SUM, group=None, async_op=False) -> Optional[Work]: + """Reduce tensor to destination process.""" + +def distributed.all_gather(tensor_list: List[Tensor], tensor: Tensor, group=None, async_op=False) -> Optional[Work]: + """Gather tensors from all processes.""" + +def distributed.gather(tensor: Tensor, gather_list=None, dst=0, group=None, async_op=False) -> Optional[Work]: + """Gather tensors to destination process.""" + +def distributed.scatter(tensor: Tensor, scatter_list=None, src=0, group=None, async_op=False) -> Optional[Work]: + """Scatter tensors from source process.""" + +def distributed.reduce_scatter(output: Tensor, input_list: List[Tensor], op=ReduceOp.SUM, group=None, async_op=False) -> Optional[Work]: + """Reduce and scatter tensors.""" + +def distributed.all_to_all(output_tensor_list: List[Tensor], input_tensor_list: List[Tensor], group=None, async_op=False) -> Optional[Work]: + """All-to-all communication.""" + +def distributed.send(tensor: Tensor, dst: int, group=None, tag=0) -> None: + """Send tensor to destination process.""" + +def distributed.recv(tensor: Tensor, src: int, group=None, tag=0) -> None: + """Receive tensor from source process.""" + +def distributed.isend(tensor: Tensor, dst: int, group=None, tag=0) -> Work: + """Non-blocking send.""" + +def distributed.irecv(tensor: Tensor, src: int, group=None, tag=0) -> Work: + """Non-blocking receive.""" +``` + +### Data Parallel Training + +Distributed data parallel training utilities. + +```python { .api } +class nn.DataParallel(Module): + """Data parallel wrapper for single-machine multi-GPU.""" + def __init__(self, module, device_ids=None, output_device=None, dim=0): ... + def forward(self, *inputs, **kwargs): ... + +class nn.parallel.DistributedDataParallel(Module): + """Distributed data parallel for multi-machine training.""" + def __init__(self, module, device_ids=None, output_device=None, dim=0, broadcast_buffers=True, + process_group=None, bucket_cap_mb=25, find_unused_parameters=False, + check_reduction=False, gradient_as_bucket_view=False): ... + def forward(self, *inputs, **kwargs): ... + def no_sync(self) -> ContextManager: ... +``` + +### Process Groups + +Advanced process group management for flexible distributed training. + +```python { .api } +class distributed.ProcessGroup: + """Process group for collective operations.""" + +def distributed.new_group(ranks=None, timeout=None, backend=None, pg_options=None) -> ProcessGroup: + """Create new process group.""" + +def distributed.new_subgroups(group_size=None, group=None, timeout=None, backend=None, pg_options=None) -> List[ProcessGroup]: + """Create subgroups.""" + +def distributed.new_subgroups_by_enumeration(ranks_per_subgroup_list, timeout=None, backend=None, pg_options=None) -> List[ProcessGroup]: + """Create subgroups by enumeration.""" +``` + +### Distributed Utilities + +Additional utilities for distributed training. + +```python { .api } +def distributed.get_process_group_ranks(group) -> List[int]: + """Get ranks in process group.""" + +def distributed.monitored_barrier(group=None, timeout=None, wait_all_ranks=False) -> None: + """Barrier with monitoring and timeout.""" + +class distributed.Store: + """Distributed key-value store.""" + def get(self, key: str) -> bytes: ... + def set(self, key: str, value: bytes): ... + def add(self, key: str, value: int) -> int: ... + def compare_set(self, key: str, expected_value: bytes, desired_value: bytes) -> bytes: ... + def wait(self, keys: List[str], timeout=None): ... + +class distributed.TCPStore(Store): + """TCP-based distributed store.""" + def __init__(self, host_name: str, port: int, world_size=None, is_master=False, timeout=None): ... + +class distributed.FileStore(Store): + """File-based distributed store.""" + def __init__(self, file_name: str, world_size=-1): ... + +class distributed.HashStore(Store): + """Hash-based distributed store.""" + def __init__(self): ... +``` + +## Usage Examples + +### Basic CUDA Operations + +```python +import torch + +# Check CUDA availability +if torch.cuda.is_available(): + print(f"CUDA devices: {torch.cuda.device_count()}") + print(f"Current device: {torch.cuda.current_device()}") + print(f"Device name: {torch.cuda.get_device_name()}") + + # Create tensors on GPU + device = torch.device('cuda') + x = torch.randn(1000, 1000, device=device) + y = torch.randn(1000, 1000, device=device) + + # GPU operations + z = torch.matmul(x, y) + + # Memory management + print(f"Allocated memory: {torch.cuda.memory_allocated() / 1e6:.1f} MB") + print(f"Cached memory: {torch.cuda.memory_reserved() / 1e6:.1f} MB") + + # Free unused memory + torch.cuda.empty_cache() + + # Move back to CPU + z_cpu = z.cpu() +else: + print("CUDA not available") +``` + +### Multi-GPU Data Parallel + +```python +import torch +import torch.nn as nn + +# Check for multiple GPUs +if torch.cuda.device_count() > 1: + print(f"Using {torch.cuda.device_count()} GPUs") + + # Define model + model = nn.Sequential( + nn.Linear(1000, 500), + nn.ReLU(), + nn.Linear(500, 100), + nn.ReLU(), + nn.Linear(100, 10) + ) + + # Wrap with DataParallel + model = nn.DataParallel(model) + model = model.cuda() + + # Create batch data + batch_size = 64 + x = torch.randn(batch_size, 1000).cuda() + + # Forward pass uses all available GPUs + output = model(x) + print(f"Output shape: {output.shape}") + print(f"Output device: {output.device}") +``` + +### CUDA Streams and Events + +```python +import torch +import time + +if torch.cuda.is_available(): + device = torch.device('cuda') + + # Create streams + stream1 = torch.cuda.Stream() + stream2 = torch.cuda.Stream() + + # Create events + start_event = torch.cuda.Event(enable_timing=True) + end_event = torch.cuda.Event(enable_timing=True) + + # Asynchronous operations + x = torch.randn(1000, 1000, device=device) + y = torch.randn(1000, 1000, device=device) + + # Record start time + start_event.record() + + # Operations on different streams + with torch.cuda.stream(stream1): + z1 = torch.matmul(x, y) + + with torch.cuda.stream(stream2): + z2 = torch.matmul(y, x) + + # Record end time + end_event.record() + + # Synchronize + torch.cuda.synchronize() + + # Get elapsed time + elapsed_time = start_event.elapsed_time(end_event) + print(f"Elapsed time: {elapsed_time:.2f} ms") +``` + +### Distributed Data Parallel Training + +```python +import torch +import torch.distributed as dist +import torch.nn as nn +import torch.optim as optim +from torch.nn.parallel import DistributedDataParallel as DDP +import os + +def setup(rank, world_size): + """Initialize distributed training.""" + os.environ['MASTER_ADDR'] = 'localhost' + os.environ['MASTER_PORT'] = '12355' + + # Initialize process group + dist.init_process_group("nccl", rank=rank, world_size=world_size) + torch.cuda.set_device(rank) + +def cleanup(): + """Clean up distributed training.""" + dist.destroy_process_group() + +def train_ddp(rank, world_size): + """Distributed training function.""" + setup(rank, world_size) + + # Create model and move to GPU + model = nn.Linear(100, 10).cuda(rank) + model = DDP(model, device_ids=[rank]) + + # Create optimizer + optimizer = optim.SGD(model.parameters(), lr=0.01) + + # Training loop + for epoch in range(10): + # Create dummy data + data = torch.randn(32, 100).cuda(rank) + targets = torch.randint(0, 10, (32,)).cuda(rank) + + # Forward pass + outputs = model(data) + loss = nn.CrossEntropyLoss()(outputs, targets) + + # Backward pass + optimizer.zero_grad() + loss.backward() + optimizer.step() + + if rank == 0: + print(f"Epoch {epoch}, Loss: {loss.item():.4f}") + + cleanup() + +# To run: python -m torch.distributed.launch --nproc_per_node=2 script.py +``` + +### Collective Communication + +```python +import torch +import torch.distributed as dist + +def collective_example(rank, world_size): + """Example of collective communication operations.""" + # Initialize + dist.init_process_group("nccl", rank=rank, world_size=world_size) + + device = torch.device(f'cuda:{rank}') + torch.cuda.set_device(device) + + # Create tensor on each process + tensor = torch.ones(2, 2).cuda() * rank + print(f"Rank {rank}: Before all_reduce: {tensor}") + + # All-reduce: sum across all processes + dist.all_reduce(tensor, op=dist.ReduceOp.SUM) + print(f"Rank {rank}: After all_reduce: {tensor}") + + # Broadcast from rank 0 + broadcast_tensor = torch.zeros(2, 2).cuda() + if rank == 0: + broadcast_tensor = torch.ones(2, 2).cuda() * 42 + + dist.broadcast(broadcast_tensor, src=0) + print(f"Rank {rank}: After broadcast: {broadcast_tensor}") + + # All-gather: collect tensors from all processes + tensor_list = [torch.zeros(2, 2).cuda() for _ in range(world_size)] + local_tensor = torch.ones(2, 2).cuda() * rank + dist.all_gather(tensor_list, local_tensor) + print(f"Rank {rank}: All gathered tensors: {tensor_list}") + + # Barrier synchronization + dist.barrier() + print(f"Rank {rank}: All processes synchronized") + + dist.destroy_process_group() +``` + +### MPS (Apple Silicon) Usage + +```python +import torch + +# Check MPS availability +if torch.mps.is_available(): + print("MPS is available") + device = torch.device('mps') + + # Create tensors on MPS + x = torch.randn(1000, 1000, device=device) + y = torch.randn(1000, 1000, device=device) + + # Perform operations + z = torch.matmul(x, y) + + # Synchronize MPS operations + torch.mps.synchronize() + + # Memory management + torch.mps.empty_cache() + + print(f"Computation completed on device: {z.device}") +else: + print("MPS not available, using CPU") + device = torch.device('cpu') +``` + +### Advanced Memory Management + +```python +import torch + +if torch.cuda.is_available(): + device = torch.device('cuda') + + # Set memory fraction + torch.cuda.set_per_process_memory_fraction(0.5) # Use only 50% of GPU memory + + # Memory profiling + torch.cuda.reset_max_memory_allocated() + torch.cuda.reset_max_memory_cached() + + # Allocate large tensors + tensors = [] + for i in range(10): + tensor = torch.randn(1000, 1000, device=device) + tensors.append(tensor) + + current_memory = torch.cuda.memory_allocated() / 1e6 + max_memory = torch.cuda.max_memory_allocated() / 1e6 + print(f"Iteration {i}: Current: {current_memory:.1f} MB, Peak: {max_memory:.1f} MB") + + # Memory summary + print(torch.cuda.memory_summary()) + + # Free memory + del tensors + torch.cuda.empty_cache() + + final_memory = torch.cuda.memory_allocated() / 1e6 + print(f"Memory after cleanup: {final_memory:.1f} MB") +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-torch/docs/index.md b/.tessl/tiles/tessl/pypi-torch/docs/index.md new file mode 100644 index 0000000..94147b8 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-torch/docs/index.md @@ -0,0 +1,238 @@ +# PyTorch + +PyTorch is a comprehensive deep learning framework that provides tensor computation with strong GPU acceleration and dynamic neural networks built on a tape-based autograd system. It offers a Python-first approach to machine learning, allowing researchers and developers to build and train neural networks using familiar Python syntax while maintaining high performance through optimized C++ and CUDA backends. + +## Package Information + +- **Package Name**: torch +- **Language**: Python +- **Installation**: `pip install torch` +- **GPU Support**: `pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118` + +## Core Imports + +```python +import torch +``` + +Common additional imports: + +```python +import torch.nn as nn +import torch.optim as optim +import torch.nn.functional as F +from torch.utils.data import DataLoader, Dataset +``` + +## Basic Usage + +```python +import torch +import torch.nn as nn +import torch.optim as optim + +# Create tensors +x = torch.tensor([[1.0, 2.0], [3.0, 4.0]], requires_grad=True) +y = torch.tensor([[5.0], [6.0]]) + +# Define a simple neural network +class SimpleNet(nn.Module): + def __init__(self): + super(SimpleNet, self).__init__() + self.linear = nn.Linear(2, 1) + + def forward(self, x): + return self.linear(x) + +# Initialize model, loss function, and optimizer +model = SimpleNet() +criterion = nn.MSELoss() +optimizer = optim.SGD(model.parameters(), lr=0.01) + +# Forward pass +output = model(x) +loss = criterion(output, y) + +# Backward pass and optimization +optimizer.zero_grad() +loss.backward() +optimizer.step() + +print(f"Loss: {loss.item()}") +print(f"Gradients: {x.grad}") +``` + +## Architecture + +PyTorch's design centers around dynamic computation graphs and the autograd system: + +- **Tensors**: Multi-dimensional arrays with automatic differentiation support +- **Autograd**: Automatic differentiation engine that records operations for backpropagation +- **nn.Module**: Base class for neural network components with parameter management +- **Optimizers**: Algorithms for updating model parameters during training +- **Device Abstraction**: Unified interface for CPU, CUDA, MPS, and XPU backends +- **JIT Compilation**: TorchScript for optimizing models for deployment + +This architecture enables rapid prototyping in research while scaling to production deployments across various hardware platforms. + +## Capabilities + +### Core Tensor Operations + +Fundamental tensor creation, manipulation, and mathematical operations. Tensors are the primary data structure supporting automatic differentiation and GPU acceleration. + +```python { .api } +def tensor(data, *, dtype=None, device=None, requires_grad=False, pin_memory=False) -> Tensor: ... +def zeros(*size, dtype=None, device=None, requires_grad=False) -> Tensor: ... +def ones(*size, dtype=None, device=None, requires_grad=False) -> Tensor: ... +def rand(*size, dtype=None, device=None, requires_grad=False) -> Tensor: ... +def randn(*size, dtype=None, device=None, requires_grad=False) -> Tensor: ... +def arange(start=0, end, step=1, *, dtype=None, device=None, requires_grad=False) -> Tensor: ... +def linspace(start, end, steps, *, dtype=None, device=None, requires_grad=False) -> Tensor: ... +``` + +[Tensor Operations](./tensor-operations.md) + +### Neural Networks + +Complete neural network building blocks including layers, activation functions, loss functions, and containers for building deep learning models. + +```python { .api } +class Module: + def forward(self, *input): ... + def parameters(self, recurse=True): ... + def named_parameters(self, prefix='', recurse=True): ... + def zero_grad(self, set_to_none=False): ... + +class Linear(Module): + def __init__(self, in_features: int, out_features: int, bias: bool = True): ... + +class Conv2d(Module): + def __init__(self, in_channels: int, out_channels: int, kernel_size, stride=1, padding=0): ... + +class ReLU(Module): + def __init__(self, inplace: bool = False): ... + +class CrossEntropyLoss(Module): + def __init__(self, weight=None, size_average=None, ignore_index=-100): ... +``` + +[Neural Networks](./neural-networks.md) + +### Training and Optimization + +Optimizers, learning rate schedulers, and training utilities for model optimization and parameter updates. + +```python { .api } +class Optimizer: + def step(self, closure=None): ... + def zero_grad(self, set_to_none=False): ... + +class SGD(Optimizer): + def __init__(self, params, lr, momentum=0, dampening=0, weight_decay=0): ... + +class Adam(Optimizer): + def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0): ... + +class StepLR: + def __init__(self, optimizer, step_size, gamma=0.1): ... + def step(self, epoch=None): ... +``` + +[Training and Optimization](./training.md) + +### Mathematical Functions + +Comprehensive mathematical operations including linear algebra, FFT, special functions, and statistical operations. + +```python { .api } +def matmul(input: Tensor, other: Tensor) -> Tensor: ... +def dot(input: Tensor, other: Tensor) -> Tensor: ... +def sum(input: Tensor, dim=None, keepdim=False, *, dtype=None) -> Tensor: ... +def mean(input: Tensor, dim=None, keepdim=False, *, dtype=None) -> Tensor: ... +def std(input: Tensor, dim=None, keepdim=False, *, dtype=None) -> Tensor: ... +def max(input: Tensor, dim=None, keepdim=False) -> Tensor: ... +def min(input: Tensor, dim=None, keepdim=False) -> Tensor: ... +``` + +[Mathematical Functions](./mathematical-functions.md) + +### Device and Distributed Computing + +Device management, CUDA operations, distributed training, and multi-GPU support for scaling deep learning workloads. + +```python { .api } +def cuda.is_available() -> bool: ... +def cuda.device_count() -> int: ... +def cuda.get_device_name(device=None) -> str: ... +def cuda.set_device(device): ... + +class DistributedDataParallel(Module): + def __init__(self, module, device_ids=None, output_device=None): ... + +def distributed.init_process_group(backend, init_method=None, timeout=default_pg_timeout): ... +def distributed.all_reduce(tensor, op=ReduceOp.SUM, group=None, async_op=False): ... +``` + +[Device and Distributed Computing](./devices-distributed.md) + +### Advanced Features + +JIT compilation, model export, graph transformations, quantization, and deployment utilities for optimizing and deploying models. + +```python { .api } +def jit.script(obj, optimize=None, _frames_up=0, _rcb=None): ... +def jit.trace(func, example_inputs, optimize=None, check_trace=True): ... + +def export.export(mod: torch.nn.Module, args, kwargs=None, *, dynamic_shapes=None): ... + +def compile(model=None, *, fullgraph=False, dynamic=None, backend="inductor"): ... + +def quantization.quantize_dynamic(model, qconfig_spec=None, dtype=torch.qint8): ... +``` + +[Advanced Features](./advanced-features.md) + +## Core Types + +```python { .api } +class Tensor: + """Multi-dimensional array with automatic differentiation support.""" + def __init__(self, data, *, dtype=None, device=None, requires_grad=False): ... + def backward(self, gradient=None, retain_graph=None, create_graph=False): ... + def detach(self) -> Tensor: ... + def numpy(self) -> numpy.ndarray: ... + def cuda(self, device=None, non_blocking=False) -> Tensor: ... + def cpu(self) -> Tensor: ... + def to(self, *args, **kwargs) -> Tensor: ... + def size(self, dim=None): ... + def shape(self) -> torch.Size: ... + def dim(self) -> int: ... + def numel(self) -> int: ... + def item(self) -> number: ... + def clone(self) -> Tensor: ... + def requires_grad_(self, requires_grad=True) -> Tensor: ... + +class dtype: + """Data type specification for tensors.""" + float32: dtype + float64: dtype + int32: dtype + int64: dtype + bool: dtype + uint8: dtype + +class device: + """Device specification for tensor placement.""" + def __init__(self, device): ... + +class Size(tuple): + """Tensor shape representation.""" + def numel(self) -> int: ... + +class Generator: + """Random number generator state.""" + def manual_seed(self, seed: int) -> Generator: ... + def get_state(self) -> Tensor: ... + def set_state(self, new_state: Tensor) -> Generator: ... +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-torch/docs/mathematical-functions.md b/.tessl/tiles/tessl/pypi-torch/docs/mathematical-functions.md new file mode 100644 index 0000000..c2c8145 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-torch/docs/mathematical-functions.md @@ -0,0 +1,539 @@ +# Mathematical Functions + +Comprehensive mathematical operations including linear algebra, FFT, special functions, and statistical operations. PyTorch provides extensive mathematical functionality across multiple specialized modules. + +## Capabilities + +### Linear Algebra Operations (torch.linalg) + +Advanced linear algebra operations for matrices and tensors. + +```python { .api } +def matmul(input: Tensor, other: Tensor) -> Tensor: + """Matrix multiplication supporting broadcasting.""" + +def solve(A: Tensor, B: Tensor) -> Tensor: + """Solve linear system AX = B.""" + +def inv(A: Tensor) -> Tensor: + """Matrix inverse.""" + +def pinv(A: Tensor, rcond=1e-15, hermitian=False) -> Tensor: + """Moore-Penrose pseudo-inverse.""" + +def det(A: Tensor) -> Tensor: + """Matrix determinant.""" + +def slogdet(A: Tensor) -> Tuple[Tensor, Tensor]: + """Sign and log-determinant.""" + +def norm(A: Tensor, ord=None, dim=None, keepdim=False, *, dtype=None) -> Tensor: + """Matrix or vector norm.""" + +def vector_norm(x: Tensor, ord=2, dim=None, keepdim=False, *, dtype=None) -> Tensor: + """Vector norm.""" + +def matrix_norm(A: Tensor, ord='fro', dim=(-2, -1), keepdim=False, *, dtype=None) -> Tensor: + """Matrix norm.""" + +def matrix_rank(A: Tensor, atol=None, rtol=None, hermitian=False) -> Tensor: + """Matrix rank.""" + +def cond(A: Tensor, p=None) -> Tensor: + """Matrix condition number.""" +``` + +### Matrix Decompositions + +Matrix factorization methods for numerical analysis. + +```python { .api } +def svd(A: Tensor, full_matrices=True) -> Tuple[Tensor, Tensor, Tensor]: + """Singular Value Decomposition.""" + +def svdvals(A: Tensor) -> Tensor: + """Singular values only.""" + +def eig(A: Tensor) -> Tuple[Tensor, Tensor]: + """Eigenvalue decomposition.""" + +def eigvals(A: Tensor) -> Tensor: + """Eigenvalues only.""" + +def eigh(A: Tensor, UPLO='L') -> Tuple[Tensor, Tensor]: + """Eigenvalue decomposition for Hermitian matrices.""" + +def eigvalsh(A: Tensor, UPLO='L') -> Tensor: + """Eigenvalues for Hermitian matrices.""" + +def qr(A: Tensor, mode='reduced') -> Tuple[Tensor, Tensor]: + """QR decomposition.""" + +def cholesky(A: Tensor, upper=False) -> Tensor: + """Cholesky decomposition.""" + +def cholesky_ex(A: Tensor, upper=False, check_errors=False) -> Tuple[Tensor, Tensor]: + """Cholesky decomposition with error checking.""" + +def lu_factor(A: Tensor, *, pivot=True) -> Tuple[Tensor, Tensor]: + """LU factorization.""" + +def lu_factor_ex(A: Tensor, *, pivot=True, check_errors=False) -> Tuple[Tensor, Tensor, Tensor]: + """LU factorization with error checking.""" +``` + +### Fast Fourier Transform (torch.fft) + +FFT operations for frequency domain analysis. + +```python { .api } +def fft(input: Tensor, n=None, dim=-1, norm=None) -> Tensor: + """One-dimensional discrete Fourier transform.""" + +def ifft(input: Tensor, n=None, dim=-1, norm=None) -> Tensor: + """One-dimensional inverse discrete Fourier transform.""" + +def rfft(input: Tensor, n=None, dim=-1, norm=None) -> Tensor: + """One-dimensional real-to-complex FFT.""" + +def irfft(input: Tensor, n=None, dim=-1, norm=None) -> Tensor: + """One-dimensional complex-to-real inverse FFT.""" + +def fft2(input: Tensor, s=None, dim=(-2, -1), norm=None) -> Tensor: + """Two-dimensional discrete Fourier transform.""" + +def ifft2(input: Tensor, s=None, dim=(-2, -1), norm=None) -> Tensor: + """Two-dimensional inverse discrete Fourier transform.""" + +def rfft2(input: Tensor, s=None, dim=(-2, -1), norm=None) -> Tensor: + """Two-dimensional real-to-complex FFT.""" + +def irfft2(input: Tensor, s=None, dim=(-2, -1), norm=None) -> Tensor: + """Two-dimensional complex-to-real inverse FFT.""" + +def fftn(input: Tensor, s=None, dim=None, norm=None) -> Tensor: + """N-dimensional discrete Fourier transform.""" + +def ifftn(input: Tensor, s=None, dim=None, norm=None) -> Tensor: + """N-dimensional inverse discrete Fourier transform.""" + +def rfftn(input: Tensor, s=None, dim=None, norm=None) -> Tensor: + """N-dimensional real-to-complex FFT.""" + +def irfftn(input: Tensor, s=None, dim=None, norm=None) -> Tensor: + """N-dimensional complex-to-real inverse FFT.""" + +def fftfreq(n: int, d=1.0, *, dtype=None, device=None, requires_grad=False) -> Tensor: + """Discrete Fourier Transform sample frequencies.""" + +def rfftfreq(n: int, d=1.0, *, dtype=None, device=None, requires_grad=False) -> Tensor: + """Real-valued discrete Fourier Transform sample frequencies.""" + +def fftshift(input: Tensor, dim=None) -> Tensor: + """Shift zero-frequency component to center.""" + +def ifftshift(input: Tensor, dim=None) -> Tensor: + """Inverse of fftshift.""" +``` + +### Special Functions (torch.special) + +Special mathematical functions for advanced computations. + +```python { .api } +def erf(input: Tensor) -> Tensor: + """Error function.""" + +def erfc(input: Tensor) -> Tensor: + """Complementary error function.""" + +def erfcx(input: Tensor) -> Tensor: + """Scaled complementary error function.""" + +def erfinv(input: Tensor) -> Tensor: + """Inverse error function.""" + +def digamma(input: Tensor) -> Tensor: + """Digamma function (logarithmic derivative of gamma).""" + +def gammaln(input: Tensor) -> Tensor: + """Log gamma function.""" + +def polygamma(n: int, input: Tensor) -> Tensor: + """Polygamma function.""" + +def multigammaln(input: Tensor, p: int) -> Tensor: + """Multivariate log gamma function.""" + +def gammainc(input: Tensor, other: Tensor) -> Tensor: + """Regularized lower incomplete gamma function.""" + +def gammaincc(input: Tensor, other: Tensor) -> Tensor: + """Regularized upper incomplete gamma function.""" + +def bessel_j0(input: Tensor) -> Tensor: + """Bessel function of the first kind of order 0.""" + +def bessel_j1(input: Tensor) -> Tensor: + """Bessel function of the first kind of order 1.""" + +def bessel_y0(input: Tensor) -> Tensor: + """Bessel function of the second kind of order 0.""" + +def bessel_y1(input: Tensor) -> Tensor: + """Bessel function of the second kind of order 1.""" + +def modified_bessel_i0(input: Tensor) -> Tensor: + """Modified Bessel function of the first kind of order 0.""" + +def modified_bessel_i1(input: Tensor) -> Tensor: + """Modified Bessel function of the first kind of order 1.""" + +def modified_bessel_k0(input: Tensor) -> Tensor: + """Modified Bessel function of the second kind of order 0.""" + +def modified_bessel_k1(input: Tensor) -> Tensor: + """Modified Bessel function of the second kind of order 1.""" + +def i0(input: Tensor) -> Tensor: + """Modified Bessel function of the first kind of order 0.""" + +def i0e(input: Tensor) -> Tensor: + """Exponentially scaled modified Bessel function of order 0.""" + +def ndtr(input: Tensor) -> Tensor: + """Standard normal cumulative distribution function.""" + +def ndtri(input: Tensor) -> Tensor: + """Inverse of standard normal cumulative distribution function.""" + +def log_ndtr(input: Tensor) -> Tensor: + """Log of standard normal cumulative distribution function.""" + +def expit(input: Tensor) -> Tensor: + """Expit function (sigmoid).""" + +def logit(input: Tensor, eps=None) -> Tensor: + """Logit function (inverse sigmoid).""" + +def xlogy(input: Tensor, other: Tensor) -> Tensor: + """Elementwise x * log(y).""" + +def xlog1py(input: Tensor, other: Tensor) -> Tensor: + """Elementwise x * log1p(y).""" + +def zeta(input: Tensor, other: Tensor) -> Tensor: + """Hurwitz zeta function.""" + +def logsumexp(input: Tensor, dim, keepdim=False) -> Tensor: + """Log of sum of exponentials.""" + +def softmax(input: Tensor, dim, dtype=None) -> Tensor: + """Softmax function.""" + +def log_softmax(input: Tensor, dim, dtype=None) -> Tensor: + """Log softmax function.""" +``` + +### Statistical Functions + +Statistical operations and probability distributions. + +```python { .api } +def mean(input: Tensor, dim=None, keepdim=False, *, dtype=None) -> Tensor: + """Mean along specified dimensions.""" + +def median(input: Tensor, dim=None, keepdim=False) -> Tensor: + """Median along specified dimensions.""" + +def mode(input: Tensor, dim=None, keepdim=False) -> Tensor: + """Mode along specified dimensions.""" + +def std(input: Tensor, dim=None, unbiased=True, keepdim=False) -> Tensor: + """Standard deviation.""" + +def var(input: Tensor, dim=None, unbiased=True, keepdim=False) -> Tensor: + """Variance.""" + +def std_mean(input: Tensor, dim=None, unbiased=True, keepdim=False) -> Tuple[Tensor, Tensor]: + """Standard deviation and mean.""" + +def var_mean(input: Tensor, dim=None, unbiased=True, keepdim=False) -> Tuple[Tensor, Tensor]: + """Variance and mean.""" + +def cov(input: Tensor, *, correction=1, fweights=None, aweights=None) -> Tensor: + """Covariance matrix.""" + +def corrcoef(input: Tensor) -> Tensor: + """Correlation coefficient matrix.""" + +def bincount(input: Tensor, weights=None, minlength=0) -> Tensor: + """Count occurrences of each value.""" + +def histogram(input: Tensor, bins, *, range=None, weight=None, density=False) -> Tuple[Tensor, Tensor]: + """Compute histogram of tensor values.""" + +def histogramdd(input: Tensor, bins, *, range=None, weight=None, density=False) -> Tuple[Tensor, List[Tensor]]: + """Compute multidimensional histogram.""" +``` + +### Sparse Operations (torch.sparse) + +Operations for sparse tensors and matrices. + +```python { .api } +def sparse.mm(mat1: Tensor, mat2: Tensor) -> Tensor: + """Sparse matrix multiplication.""" + +def sparse.addmm(bias: Tensor, mat1: Tensor, mat2: Tensor, *, beta=1, alpha=1) -> Tensor: + """Sparse addmm operation.""" + +def sparse.sum(input: Tensor, dim=None, dtype=None) -> Tensor: + """Sum of sparse tensor elements.""" + +def sparse.softmax(input: Tensor, dim: int, *, dtype=None) -> Tensor: + """Sparse softmax.""" + +def sparse.log_softmax(input: Tensor, dim: int, *, dtype=None) -> Tensor: + """Sparse log softmax.""" + +def sparse.spsolve(A: Tensor, B: Tensor) -> Tensor: + """Solve sparse linear system.""" + +def sparse.sampled_addmm(bias: Tensor, input: Tensor, mat1: Tensor, mat2: Tensor, *, beta=1, alpha=1) -> Tensor: + """Sampled sparse matrix multiplication and addition.""" +``` + +### Random Sampling + +Random number generation and sampling functions. + +```python { .api } +def manual_seed(seed: int): + """Set random seed for reproducibility.""" + +def initial_seed() -> int: + """Return initial random seed.""" + +def seed() -> int: + """Generate random seed.""" + +def get_rng_state() -> Tensor: + """Get random number generator state.""" + +def set_rng_state(new_state: Tensor): + """Set random number generator state.""" + +def bernoulli(input: Tensor, *, generator=None) -> Tensor: + """Sample from Bernoulli distribution.""" + +def poisson(input: Tensor, generator=None) -> Tensor: + """Sample from Poisson distribution.""" + +def normal(mean: float, std: float, size, *, dtype=None, device=None, requires_grad=False) -> Tensor: + """Sample from normal distribution.""" + +def uniform(low: float, high: float, size, *, dtype=None, device=None, requires_grad=False) -> Tensor: + """Sample from uniform distribution.""" + +def exponential(lambd: float, size, *, dtype=None, device=None, requires_grad=False) -> Tensor: + """Sample from exponential distribution.""" + +def geometric(p: float, size, *, dtype=None, device=None, requires_grad=False) -> Tensor: + """Sample from geometric distribution.""" + +def multinomial(input: Tensor, num_samples: int, replacement=False, *, generator=None) -> Tensor: + """Sample from multinomial distribution.""" +``` + +## Usage Examples + +### Linear Algebra Operations + +```python +import torch +import torch.linalg as LA + +# Matrix operations +A = torch.randn(3, 3) +B = torch.randn(3, 2) + +# Basic operations +det_A = LA.det(A) +inv_A = LA.inv(A) +X = LA.solve(A, B) # Solve AX = B + +# Matrix decompositions +U, S, V = LA.svd(A) +eigenvals, eigenvecs = LA.eig(A) +Q, R = LA.qr(A) + +# Norms and properties +frobenius_norm = LA.norm(A, 'fro') +spectral_norm = LA.norm(A, 2) +condition_number = LA.cond(A) +rank = LA.matrix_rank(A) + +print(f"Determinant: {det_A}") +print(f"Condition number: {condition_number}") +print(f"Rank: {rank}") +``` + +### FFT Operations + +```python +import torch +import torch.fft as fft + +# Create signal +t = torch.linspace(0, 1, 1000) +signal = torch.sin(2 * torch.pi * 5 * t) + torch.sin(2 * torch.pi * 10 * t) + +# FFT analysis +signal_fft = fft.fft(signal) +signal_rfft = fft.rfft(signal) # Real-valued input +frequencies = fft.fftfreq(len(signal)) + +# 2D FFT for images +image = torch.randn(256, 256) +image_fft = fft.fft2(image) +image_shifted = fft.fftshift(image_fft) + +# Inverse transform +reconstructed = fft.ifft(signal_fft).real + +print(f"Original signal shape: {signal.shape}") +print(f"FFT shape: {signal_fft.shape}") +print(f"Real FFT shape: {signal_rfft.shape}") +print(f"Reconstruction error: {torch.mean((signal - reconstructed) ** 2)}") +``` + +### Special Functions + +```python +import torch +import torch.special as special + +# Error functions +x = torch.linspace(-3, 3, 100) +erf_vals = special.erf(x) +erfc_vals = special.erfc(x) + +# Gamma functions +gamma_vals = torch.exp(special.gammaln(x + 1)) # Gamma function via log +digamma_vals = special.digamma(x + 1) + +# Bessel functions +bessel_j0 = special.bessel_j0(x) +bessel_y0 = special.bessel_y0(x + 0.1) # Avoid singularity at 0 + +# Probability functions +sigmoid_vals = special.expit(x) # Sigmoid +logit_vals = special.logit(torch.sigmoid(x)) # Should recover x + +# Normal distribution +ndtr_vals = special.ndtr(x) # CDF +log_ndtr_vals = special.log_ndtr(x) # Log CDF + +print(f"erf(1.0): {special.erf(torch.tensor(1.0))}") +print(f"Gamma(5): {torch.exp(special.gammaln(torch.tensor(5.0)))}") +print(f"Sigmoid(0): {special.expit(torch.tensor(0.0))}") +``` + +### Statistical Analysis + +```python +import torch + +# Generate sample data +data = torch.randn(1000, 10) + +# Basic statistics +mean_vals = torch.mean(data, dim=0) +std_vals = torch.std(data, dim=0) +var_vals = torch.var(data, dim=0) + +# Along different dimensions +overall_mean = torch.mean(data) +row_means = torch.mean(data, dim=1) + +# Quantiles and percentiles +median_vals = torch.median(data, dim=0).values +q25 = torch.quantile(data, 0.25, dim=0) +q75 = torch.quantile(data, 0.75, dim=0) + +# Correlation +correlation_matrix = torch.corrcoef(data.T) +covariance_matrix = torch.cov(data.T) + +# Histogram +values = torch.randn(10000) +hist, bin_edges = torch.histogram(values, bins=50) + +print(f"Data shape: {data.shape}") +print(f"Mean: {mean_vals[:5]}") # First 5 features +print(f"Std: {std_vals[:5]}") +print(f"Correlation matrix shape: {correlation_matrix.shape}") +``` + +### Sparse Matrix Operations + +```python +import torch + +# Create sparse matrices +indices = torch.LongTensor([[0, 1, 1], [2, 0, 2]]) +values = torch.FloatTensor([3, 4, 5]) +shape = (2, 3) +sparse_a = torch.sparse_coo_tensor(indices, values, shape) + +# Dense matrix +dense_b = torch.randn(3, 4) + +# Sparse matrix multiplication +result = torch.sparse.mm(sparse_a, dense_b) + +# Sparse addition with bias +bias = torch.randn(2, 4) +result_with_bias = torch.sparse.addmm(bias, sparse_a, dense_b) + +# Convert to dense for inspection +dense_a = sparse_a.to_dense() + +print(f"Sparse matrix:\n{dense_a}") +print(f"Result shape: {result.shape}") +print(f"Sparse matrix multiplication completed") +``` + +### Advanced Mathematical Operations + +```python +import torch +import torch.linalg as LA + +# Batch operations +batch_size = 32 +matrix_size = 64 +batch_matrices = torch.randn(batch_size, matrix_size, matrix_size) + +# Batch linear algebra +batch_det = LA.det(batch_matrices) +batch_eigenvals = LA.eigvals(batch_matrices) + +# Solve batch of linear systems +batch_rhs = torch.randn(batch_size, matrix_size, 10) +batch_solutions = LA.solve(batch_matrices, batch_rhs) + +# Batch SVD +U, S, V = LA.svd(batch_matrices) + +# Complex number operations +complex_tensor = torch.complex(torch.randn(100), torch.randn(100)) +complex_fft = torch.fft.fft(complex_tensor) +phase = torch.angle(complex_tensor) +magnitude = torch.abs(complex_tensor) + +print(f"Batch determinants shape: {batch_det.shape}") +print(f"Batch eigenvalues shape: {batch_eigenvals.shape}") +print(f"Complex tensor magnitude range: {magnitude.min():.3f} to {magnitude.max():.3f}") +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-torch/docs/neural-networks.md b/.tessl/tiles/tessl/pypi-torch/docs/neural-networks.md new file mode 100644 index 0000000..ae5285c --- /dev/null +++ b/.tessl/tiles/tessl/pypi-torch/docs/neural-networks.md @@ -0,0 +1,622 @@ +# Neural Networks + +Complete neural network building blocks including layers, activation functions, loss functions, and containers for building deep learning models. The torch.nn module provides high-level abstractions for neural network construction. + +## Capabilities + +### Base Classes + +Core classes that form the foundation of all neural network components. + +```python { .api } +class Module: + """Base class for all neural network modules.""" + def __init__(self): ... + def forward(self, *input): + """Define forward computation.""" + def parameters(self, recurse=True): + """Return iterator over module parameters.""" + def named_parameters(self, prefix='', recurse=True): + """Return iterator over (name, parameter) pairs.""" + def modules(self): + """Return iterator over all modules.""" + def named_modules(self, memo=None, prefix=''): + """Return iterator over (name, module) pairs.""" + def children(self): + """Return iterator over immediate children modules.""" + def named_children(self): + """Return iterator over (name, child) pairs.""" + def train(self, mode=True): + """Set module in training mode.""" + def eval(self): + """Set module in evaluation mode.""" + def zero_grad(self, set_to_none=False): + """Set gradients to zero.""" + def to(self, *args, **kwargs): + """Move module to device/dtype.""" + def cuda(self, device=None): + """Move module to CUDA device.""" + def cpu(self): + """Move module to CPU.""" + def state_dict(self, destination=None, prefix='', keep_vars=False): + """Return dictionary of module state.""" + def load_state_dict(self, state_dict, strict=True): + """Load parameters and buffers.""" + +class Parameter(Tensor): + """Trainable parameter tensor.""" + def __init__(self, data=None, requires_grad=True): ... + +class UninitializedParameter(Parameter): + """Parameter that is not yet initialized.""" + def __init__(self, requires_grad=True): ... +``` + +### Linear Layers + +Dense layers that perform linear transformations. + +```python { .api } +class Linear(Module): + """Linear transformation: y = xA^T + b.""" + def __init__(self, in_features: int, out_features: int, bias: bool = True, device=None, dtype=None): ... + def forward(self, input: Tensor) -> Tensor: ... + +class Bilinear(Module): + """Bilinear transformation: y = x1^T A x2 + b.""" + def __init__(self, in1_features: int, in2_features: int, out_features: int, bias: bool = True): ... + def forward(self, input1: Tensor, input2: Tensor) -> Tensor: ... + +class LazyLinear(Module): + """Linear layer with lazy weight initialization.""" + def __init__(self, out_features: int, bias: bool = True): ... + def forward(self, input: Tensor) -> Tensor: ... + +class Identity(Module): + """Identity transformation.""" + def __init__(self, *args, **kwargs): ... + def forward(self, input: Tensor) -> Tensor: ... +``` + +### Convolution Layers + +Convolutional layers for spatial feature extraction. + +```python { .api } +class Conv1d(Module): + """1D convolution layer.""" + def __init__(self, in_channels: int, out_channels: int, kernel_size, stride=1, + padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'): ... + def forward(self, input: Tensor) -> Tensor: ... + +class Conv2d(Module): + """2D convolution layer.""" + def __init__(self, in_channels: int, out_channels: int, kernel_size, stride=1, + padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'): ... + def forward(self, input: Tensor) -> Tensor: ... + +class Conv3d(Module): + """3D convolution layer.""" + def __init__(self, in_channels: int, out_channels: int, kernel_size, stride=1, + padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros'): ... + def forward(self, input: Tensor) -> Tensor: ... + +class ConvTranspose1d(Module): + """1D transposed convolution layer.""" + def __init__(self, in_channels: int, out_channels: int, kernel_size, stride=1, + padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros'): ... + def forward(self, input: Tensor, output_size=None) -> Tensor: ... + +class ConvTranspose2d(Module): + """2D transposed convolution layer.""" + def __init__(self, in_channels: int, out_channels: int, kernel_size, stride=1, + padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros'): ... + def forward(self, input: Tensor, output_size=None) -> Tensor: ... + +class ConvTranspose3d(Module): + """3D transposed convolution layer.""" + def __init__(self, in_channels: int, out_channels: int, kernel_size, stride=1, + padding=0, output_padding=0, groups=1, bias=True, dilation=1, padding_mode='zeros'): ... + def forward(self, input: Tensor, output_size=None) -> Tensor: ... +``` + +### Activation Functions + +Non-linear activation functions for introducing non-linearity. + +```python { .api } +class ReLU(Module): + """Rectified Linear Unit: max(0, x).""" + def __init__(self, inplace: bool = False): ... + def forward(self, input: Tensor) -> Tensor: ... + +class ReLU6(Module): + """ReLU clamped to maximum value of 6.""" + def __init__(self, inplace: bool = False): ... + def forward(self, input: Tensor) -> Tensor: ... + +class LeakyReLU(Module): + """Leaky ReLU: max(negative_slope * x, x).""" + def __init__(self, negative_slope: float = 0.01, inplace: bool = False): ... + def forward(self, input: Tensor) -> Tensor: ... + +class PReLU(Module): + """Parametric ReLU with learnable negative slope.""" + def __init__(self, num_parameters: int = 1, init: float = 0.25): ... + def forward(self, input: Tensor) -> Tensor: ... + +class ELU(Module): + """Exponential Linear Unit.""" + def __init__(self, alpha: float = 1.0, inplace: bool = False): ... + def forward(self, input: Tensor) -> Tensor: ... + +class SELU(Module): + """Scaled Exponential Linear Unit.""" + def __init__(self, inplace: bool = False): ... + def forward(self, input: Tensor) -> Tensor: ... + +class GELU(Module): + """Gaussian Error Linear Unit.""" + def __init__(self, approximate: str = 'none'): ... + def forward(self, input: Tensor) -> Tensor: ... + +class SiLU(Module): + """Sigmoid Linear Unit (Swish): x * sigmoid(x).""" + def __init__(self, inplace: bool = False): ... + def forward(self, input: Tensor) -> Tensor: ... + +class Mish(Module): + """Mish activation: x * tanh(softplus(x)).""" + def __init__(self, inplace: bool = False): ... + def forward(self, input: Tensor) -> Tensor: ... + +class Sigmoid(Module): + """Sigmoid activation: 1 / (1 + exp(-x)).""" + def __init__(self): ... + def forward(self, input: Tensor) -> Tensor: ... + +class Tanh(Module): + """Hyperbolic tangent activation.""" + def __init__(self): ... + def forward(self, input: Tensor) -> Tensor: ... + +class Softmax(Module): + """Softmax activation along specified dimension.""" + def __init__(self, dim=None): ... + def forward(self, input: Tensor) -> Tensor: ... + +class LogSoftmax(Module): + """Log-Softmax activation.""" + def __init__(self, dim=None): ... + def forward(self, input: Tensor) -> Tensor: ... +``` + +### Normalization Layers + +Normalization techniques for training stability and performance. + +```python { .api } +class BatchNorm1d(Module): + """Batch normalization for 2D or 3D inputs.""" + def __init__(self, num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True): ... + def forward(self, input: Tensor) -> Tensor: ... + +class BatchNorm2d(Module): + """Batch normalization for 4D inputs.""" + def __init__(self, num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True): ... + def forward(self, input: Tensor) -> Tensor: ... + +class BatchNorm3d(Module): + """Batch normalization for 5D inputs.""" + def __init__(self, num_features, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True): ... + def forward(self, input: Tensor) -> Tensor: ... + +class LayerNorm(Module): + """Layer normalization.""" + def __init__(self, normalized_shape, eps=1e-05, elementwise_affine=True, bias=True): ... + def forward(self, input: Tensor) -> Tensor: ... + +class GroupNorm(Module): + """Group normalization.""" + def __init__(self, num_groups: int, num_channels: int, eps=1e-05, affine=True): ... + def forward(self, input: Tensor) -> Tensor: ... + +class InstanceNorm1d(Module): + """Instance normalization for 3D inputs.""" + def __init__(self, num_features, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False): ... + def forward(self, input: Tensor) -> Tensor: ... + +class InstanceNorm2d(Module): + """Instance normalization for 4D inputs.""" + def __init__(self, num_features, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False): ... + def forward(self, input: Tensor) -> Tensor: ... + +class InstanceNorm3d(Module): + """Instance normalization for 5D inputs.""" + def __init__(self, num_features, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False): ... + def forward(self, input: Tensor) -> Tensor: ... +``` + +### Pooling Layers + +Pooling operations for spatial dimension reduction. + +```python { .api } +class MaxPool1d(Module): + """1D max pooling.""" + def __init__(self, kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False): ... + def forward(self, input: Tensor) -> Tensor: ... + +class MaxPool2d(Module): + """2D max pooling.""" + def __init__(self, kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False): ... + def forward(self, input: Tensor) -> Tensor: ... + +class MaxPool3d(Module): + """3D max pooling.""" + def __init__(self, kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False): ... + def forward(self, input: Tensor) -> Tensor: ... + +class AvgPool1d(Module): + """1D average pooling.""" + def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True): ... + def forward(self, input: Tensor) -> Tensor: ... + +class AvgPool2d(Module): + """2D average pooling.""" + def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None): ... + def forward(self, input: Tensor) -> Tensor: ... + +class AvgPool3d(Module): + """3D average pooling.""" + def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True, divisor_override=None): ... + def forward(self, input: Tensor) -> Tensor: ... + +class AdaptiveMaxPool1d(Module): + """1D adaptive max pooling.""" + def __init__(self, output_size, return_indices=False): ... + def forward(self, input: Tensor) -> Tensor: ... + +class AdaptiveMaxPool2d(Module): + """2D adaptive max pooling.""" + def __init__(self, output_size, return_indices=False): ... + def forward(self, input: Tensor) -> Tensor: ... + +class AdaptiveAvgPool1d(Module): + """1D adaptive average pooling.""" + def __init__(self, output_size): ... + def forward(self, input: Tensor) -> Tensor: ... + +class AdaptiveAvgPool2d(Module): + """2D adaptive average pooling.""" + def __init__(self, output_size): ... + def forward(self, input: Tensor) -> Tensor: ... +``` + +### Loss Functions + +Loss functions for training neural networks. + +```python { .api } +class MSELoss(Module): + """Mean Squared Error loss.""" + def __init__(self, size_average=None, reduce=None, reduction='mean'): ... + def forward(self, input: Tensor, target: Tensor) -> Tensor: ... + +class L1Loss(Module): + """Mean Absolute Error loss.""" + def __init__(self, size_average=None, reduce=None, reduction='mean'): ... + def forward(self, input: Tensor, target: Tensor) -> Tensor: ... + +class CrossEntropyLoss(Module): + """Cross entropy loss for classification.""" + def __init__(self, weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean', label_smoothing=0.0): ... + def forward(self, input: Tensor, target: Tensor) -> Tensor: ... + +class NLLLoss(Module): + """Negative log likelihood loss.""" + def __init__(self, weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean'): ... + def forward(self, input: Tensor, target: Tensor) -> Tensor: ... + +class BCELoss(Module): + """Binary cross entropy loss.""" + def __init__(self, weight=None, size_average=None, reduce=None, reduction='mean'): ... + def forward(self, input: Tensor, target: Tensor) -> Tensor: ... + +class BCEWithLogitsLoss(Module): + """Binary cross entropy with sigmoid.""" + def __init__(self, weight=None, size_average=None, reduce=None, reduction='mean', pos_weight=None): ... + def forward(self, input: Tensor, target: Tensor) -> Tensor: ... + +class KLDivLoss(Module): + """Kullback-Leibler divergence loss.""" + def __init__(self, size_average=None, reduce=None, reduction='mean', log_target=False): ... + def forward(self, input: Tensor, target: Tensor) -> Tensor: ... + +class SmoothL1Loss(Module): + """Smooth L1 loss (Huber loss).""" + def __init__(self, size_average=None, reduce=None, reduction='mean', beta=1.0): ... + def forward(self, input: Tensor, target: Tensor) -> Tensor: ... + +class HuberLoss(Module): + """Huber loss.""" + def __init__(self, reduction='mean', delta=1.0): ... + def forward(self, input: Tensor, target: Tensor) -> Tensor: ... +``` + +### Recurrent Neural Networks + +RNN, LSTM, and GRU layers for sequential data processing. + +```python { .api } +class RNN(Module): + """Multi-layer RNN with tanh or ReLU non-linearity.""" + def __init__(self, input_size, hidden_size, num_layers=1, nonlinearity='tanh', bias=True, + batch_first=False, dropout=0.0, bidirectional=False): ... + def forward(self, input, h_0=None) -> Tuple[Tensor, Tensor]: ... + +class LSTM(Module): + """Multi-layer Long Short-Term Memory network.""" + def __init__(self, input_size, hidden_size, num_layers=1, bias=True, batch_first=False, + dropout=0.0, bidirectional=False, proj_size=0): ... + def forward(self, input, hx=None) -> Tuple[Tensor, Tuple[Tensor, Tensor]]: ... + +class GRU(Module): + """Multi-layer Gated Recurrent Unit network.""" + def __init__(self, input_size, hidden_size, num_layers=1, bias=True, batch_first=False, + dropout=0.0, bidirectional=False): ... + def forward(self, input, h_0=None) -> Tuple[Tensor, Tensor]: ... + +class RNNCell(Module): + """RNN cell.""" + def __init__(self, input_size, hidden_size, bias=True, nonlinearity='tanh'): ... + def forward(self, input, hidden) -> Tensor: ... + +class LSTMCell(Module): + """LSTM cell.""" + def __init__(self, input_size, hidden_size, bias=True): ... + def forward(self, input, hx=None) -> Tuple[Tensor, Tensor]: ... + +class GRUCell(Module): + """GRU cell.""" + def __init__(self, input_size, hidden_size, bias=True): ... + def forward(self, input, hidden) -> Tensor: ... +``` + +### Transformer Components + +Transformer architecture components for attention-based models. + +```python { .api } +class Transformer(Module): + """Complete transformer model.""" + def __init__(self, d_model=512, nhead=8, num_encoder_layers=6, num_decoder_layers=6, + dim_feedforward=2048, dropout=0.1, activation='relu', custom_encoder=None, custom_decoder=None): ... + def forward(self, src, tgt, src_mask=None, tgt_mask=None, memory_mask=None) -> Tensor: ... + +class TransformerEncoder(Module): + """Stack of transformer encoder layers.""" + def __init__(self, encoder_layer, num_layers, norm=None): ... + def forward(self, src, mask=None, src_key_padding_mask=None) -> Tensor: ... + +class TransformerEncoderLayer(Module): + """Single transformer encoder layer.""" + def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation='relu', batch_first=False): ... + def forward(self, src, src_mask=None, src_key_padding_mask=None) -> Tensor: ... + +class TransformerDecoder(Module): + """Stack of transformer decoder layers.""" + def __init__(self, decoder_layer, num_layers, norm=None): ... + def forward(self, tgt, memory, tgt_mask=None, memory_mask=None) -> Tensor: ... + +class TransformerDecoderLayer(Module): + """Single transformer decoder layer.""" + def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation='relu', batch_first=False): ... + def forward(self, tgt, memory, tgt_mask=None, memory_mask=None) -> Tensor: ... + +class MultiheadAttention(Module): + """Multi-head attention mechanism.""" + def __init__(self, embed_dim, num_heads, dropout=0.0, bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None, batch_first=False): ... + def forward(self, query, key, value, key_padding_mask=None, need_weights=True, attn_mask=None) -> Tuple[Tensor, Tensor]: ... +``` + +### Container Classes + +Containers for organizing and combining multiple modules. + +```python { .api } +class Sequential(Module): + """Sequential container of modules.""" + def __init__(self, *args): ... + def forward(self, input): ... + +class ModuleList(Module): + """List container for modules.""" + def __init__(self, modules=None): ... + def append(self, module): ... + def extend(self, modules): ... + def insert(self, index, module): ... + +class ModuleDict(Module): + """Dictionary container for modules.""" + def __init__(self, modules=None): ... + def __getitem__(self, key): ... + def __setitem__(self, key, module): ... + def keys(self): ... + def items(self): ... + def values(self): ... + +class ParameterList(Module): + """List container for parameters.""" + def __init__(self, parameters=None): ... + def append(self, parameter): ... + def extend(self, parameters): ... + +class ParameterDict(Module): + """Dictionary container for parameters.""" + def __init__(self, parameters=None): ... + def __getitem__(self, key): ... + def __setitem__(self, key, parameter): ... +``` + +### Dropout and Regularization + +Regularization techniques to prevent overfitting. + +```python { .api } +class Dropout(Module): + """Randomly zeros elements with probability p.""" + def __init__(self, p=0.5, inplace=False): ... + def forward(self, input: Tensor) -> Tensor: ... + +class Dropout1d(Module): + """1D channel-wise dropout.""" + def __init__(self, p=0.5, inplace=False): ... + def forward(self, input: Tensor) -> Tensor: ... + +class Dropout2d(Module): + """2D channel-wise dropout.""" + def __init__(self, p=0.5, inplace=False): ... + def forward(self, input: Tensor) -> Tensor: ... + +class Dropout3d(Module): + """3D channel-wise dropout.""" + def __init__(self, p=0.5, inplace=False): ... + def forward(self, input: Tensor) -> Tensor: ... + +class AlphaDropout(Module): + """Alpha dropout for SELU networks.""" + def __init__(self, p=0.5, inplace=False): ... + def forward(self, input: Tensor) -> Tensor: ... +``` + +### Embedding Layers + +Embedding layers for discrete inputs like words or tokens. + +```python { .api } +class Embedding(Module): + """Lookup table for embeddings.""" + def __init__(self, num_embeddings: int, embedding_dim: int, padding_idx=None, max_norm=None, + norm_type=2.0, scale_grad_by_freq=False, sparse=False): ... + def forward(self, input: Tensor) -> Tensor: ... + +class EmbeddingBag(Module): + """Embedding bag for variable length sequences.""" + def __init__(self, num_embeddings: int, embedding_dim: int, max_norm=None, norm_type=2.0, + scale_grad_by_freq=False, mode='mean', sparse=False, include_last_offset=False): ... + def forward(self, input: Tensor, offsets=None, per_sample_weights=None) -> Tensor: ... +``` + +## Usage Examples + +### Simple Neural Network + +```python +import torch +import torch.nn as nn +import torch.optim as optim + +class SimpleNet(nn.Module): + def __init__(self, input_size, hidden_size, output_size): + super(SimpleNet, self).__init__() + self.layers = nn.Sequential( + nn.Linear(input_size, hidden_size), + nn.ReLU(), + nn.Linear(hidden_size, hidden_size), + nn.ReLU(), + nn.Linear(hidden_size, output_size) + ) + + def forward(self, x): + return self.layers(x) + +# Initialize model +model = SimpleNet(784, 128, 10) +criterion = nn.CrossEntropyLoss() +optimizer = optim.Adam(model.parameters(), lr=0.001) + +# Example forward pass +x = torch.randn(32, 784) # Batch of 32 samples +y = torch.randint(0, 10, (32,)) # Labels + +output = model(x) +loss = criterion(output, y) + +# Backward pass +optimizer.zero_grad() +loss.backward() +optimizer.step() + +print(f"Loss: {loss.item()}") +``` + +### Convolutional Neural Network + +```python +import torch +import torch.nn as nn + +class CNN(nn.Module): + def __init__(self, num_classes=10): + super(CNN, self).__init__() + self.features = nn.Sequential( + nn.Conv2d(3, 32, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2), + nn.Conv2d(32, 64, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2), + nn.Conv2d(64, 128, kernel_size=3, padding=1), + nn.ReLU(inplace=True), + nn.AdaptiveAvgPool2d((1, 1)) + ) + self.classifier = nn.Sequential( + nn.Dropout(0.5), + nn.Linear(128, num_classes) + ) + + def forward(self, x): + x = self.features(x) + x = torch.flatten(x, 1) + x = self.classifier(x) + return x + +# Initialize model +model = CNN(num_classes=10) + +# Example forward pass +x = torch.randn(8, 3, 32, 32) # Batch of images +output = model(x) +print(f"Output shape: {output.shape}") +``` + +### LSTM for Sequence Processing + +```python +import torch +import torch.nn as nn + +class LSTMModel(nn.Module): + def __init__(self, vocab_size, embed_size, hidden_size, num_layers, num_classes): + super(LSTMModel, self).__init__() + self.embedding = nn.Embedding(vocab_size, embed_size) + self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True) + self.fc = nn.Linear(hidden_size, num_classes) + self.dropout = nn.Dropout(0.5) + + def forward(self, x): + embedded = self.embedding(x) + lstm_out, (hidden, cell) = self.lstm(embedded) + # Use the last output + output = self.fc(self.dropout(lstm_out[:, -1, :])) + return output + +# Initialize model +model = LSTMModel(vocab_size=10000, embed_size=128, hidden_size=256, num_layers=2, num_classes=5) + +# Example forward pass +x = torch.randint(0, 10000, (16, 50)) # Batch of sequences +output = model(x) +print(f"Output shape: {output.shape}") +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-torch/docs/tensor-operations.md b/.tessl/tiles/tessl/pypi-torch/docs/tensor-operations.md new file mode 100644 index 0000000..03a4b00 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-torch/docs/tensor-operations.md @@ -0,0 +1,453 @@ +# Tensor Operations + +Core tensor creation, manipulation, and mathematical operations that form the foundation of PyTorch's computational capabilities. These operations support automatic differentiation and GPU acceleration. + +## Capabilities + +### Tensor Creation + +Create tensors from data, with specific values, or using random initialization patterns. + +```python { .api } +def tensor(data, *, dtype=None, device=None, requires_grad=False, pin_memory=False) -> Tensor: + """ + Construct a tensor with data. + + Parameters: + - data: Initial data (list, tuple, ndarray, scalar, tensor) + - dtype: Data type (torch.float32, torch.int64, etc.) + - device: Device placement (torch.device or string) + - requires_grad: Enable automatic differentiation + - pin_memory: Use pinned memory for faster GPU transfer + + Returns: + Tensor with specified data and properties + """ + +def zeros(*size, dtype=None, device=None, requires_grad=False) -> Tensor: + """Create tensor filled with zeros.""" + +def ones(*size, dtype=None, device=None, requires_grad=False) -> Tensor: + """Create tensor filled with ones.""" + +def empty(*size, dtype=None, device=None, requires_grad=False) -> Tensor: + """Create uninitialized tensor.""" + +def full(size, fill_value, *, dtype=None, device=None, requires_grad=False) -> Tensor: + """Create tensor filled with specific value.""" + +def eye(n, m=None, *, dtype=None, device=None, requires_grad=False) -> Tensor: + """Create identity matrix.""" +``` + +### Random Tensor Creation + +Generate tensors with random values from various distributions. + +```python { .api } +def rand(*size, dtype=None, device=None, requires_grad=False) -> Tensor: + """Random values from uniform distribution [0, 1).""" + +def randn(*size, dtype=None, device=None, requires_grad=False) -> Tensor: + """Random values from standard normal distribution.""" + +def randint(low=0, high, size, *, dtype=None, device=None, requires_grad=False) -> Tensor: + """Random integers from [low, high).""" + +def randperm(n, *, dtype=torch.int64, device=None, requires_grad=False) -> Tensor: + """Random permutation of integers 0 to n-1.""" + +def multinomial(input, num_samples, replacement=False, *, generator=None) -> Tensor: + """Sample from multinomial distribution.""" +``` + +### Range and Sequence Creation + +Create tensors with sequential or linearly spaced values. + +```python { .api } +def arange(start=0, end, step=1, *, dtype=None, device=None, requires_grad=False) -> Tensor: + """Values from start to end with step.""" + +def linspace(start, end, steps, *, dtype=None, device=None, requires_grad=False) -> Tensor: + """Linearly spaced values from start to end.""" + +def logspace(start, end, steps, base=10.0, *, dtype=None, device=None, requires_grad=False) -> Tensor: + """Logarithmically spaced values.""" +``` + +### Tensor Conversion and Creation from Existing Data + +Convert between PyTorch tensors and other data structures. + +```python { .api } +def from_numpy(ndarray) -> Tensor: + """Create tensor from NumPy array (shares memory).""" + +def as_tensor(data, dtype=None, device=None) -> Tensor: + """Convert data to tensor, avoiding copy if possible.""" + +def stack(tensors, dim=0) -> Tensor: + """Stack tensors along new dimension.""" + +def cat(tensors, dim=0) -> Tensor: + """Concatenate tensors along existing dimension.""" + +def hstack(tensors) -> Tensor: + """Stack tensors horizontally (column-wise).""" + +def vstack(tensors) -> Tensor: + """Stack tensors vertically (row-wise).""" + +def dstack(tensors) -> Tensor: + """Stack tensors depth-wise (along third dimension).""" +``` + +### Shape Manipulation + +Reshape, transpose, and manipulate tensor dimensions. + +```python { .api } +def reshape(input, shape) -> Tensor: + """Return tensor with new shape.""" + +def view(input, *shape) -> Tensor: + """Return tensor with new shape (shares memory).""" + +def squeeze(input, dim=None) -> Tensor: + """Remove dimensions of size 1.""" + +def unsqueeze(input, dim) -> Tensor: + """Add dimension of size 1.""" + +def transpose(input, dim0, dim1) -> Tensor: + """Swap two dimensions.""" + +def permute(input, dims) -> Tensor: + """Permute dimensions.""" + +def flatten(input, start_dim=0, end_dim=-1) -> Tensor: + """Flatten dimensions.""" + +def flip(input, dims) -> Tensor: + """Reverse tensor along specified dimensions.""" +``` + +### Tensor Splitting and Joining + +Split tensors into chunks or join multiple tensors. + +```python { .api } +def split(tensor, split_size_or_sections, dim=0) -> List[Tensor]: + """Split tensor into chunks.""" + +def chunk(input, chunks, dim=0) -> List[Tensor]: + """Split tensor into specific number of chunks.""" + +def unbind(input, dim=0) -> List[Tensor]: + """Remove dimension and return sequence of tensors.""" + +def meshgrid(*tensors, indexing='ij') -> List[Tensor]: + """Create coordinate grids.""" +``` + +### Indexing and Selection + +Advanced indexing operations for selecting and manipulating tensor elements. + +```python { .api } +def gather(input, dim, index) -> Tensor: + """Gather values along axis specified by index.""" + +def scatter(input, dim, index, src) -> Tensor: + """Scatter values along axis specified by index.""" + +def scatter_add(input, dim, index, src) -> Tensor: + """Scatter and add values.""" + +def index_select(input, dim, index) -> Tensor: + """Select elements along dimension.""" + +def masked_select(input, mask) -> Tensor: + """Select elements where mask is True.""" + +def nonzero(input, *, as_tuple=False) -> Tensor: + """Return indices of non-zero elements.""" + +def where(condition, x, y) -> Tensor: + """Select elements from x or y based on condition.""" +``` + +### Element-wise Mathematical Operations + +Basic arithmetic and mathematical functions applied element-wise. + +```python { .api } +def add(input, other, *, alpha=1) -> Tensor: + """Add tensors element-wise.""" + +def sub(input, other, *, alpha=1) -> Tensor: + """Subtract tensors element-wise.""" + +def mul(input, other) -> Tensor: + """Multiply tensors element-wise.""" + +def div(input, other, *, rounding_mode=None) -> Tensor: + """Divide tensors element-wise.""" + +def pow(input, exponent) -> Tensor: + """Raise to power element-wise.""" + +def abs(input) -> Tensor: + """Absolute value element-wise.""" + +def neg(input) -> Tensor: + """Negate elements.""" + +def sign(input) -> Tensor: + """Sign of elements (-1, 0, 1).""" + +def sqrt(input) -> Tensor: + """Square root element-wise.""" + +def square(input) -> Tensor: + """Square element-wise.""" + +def exp(input) -> Tensor: + """Exponential function element-wise.""" + +def log(input) -> Tensor: + """Natural logarithm element-wise.""" + +def log10(input) -> Tensor: + """Base-10 logarithm element-wise.""" + +def log2(input) -> Tensor: + """Base-2 logarithm element-wise.""" +``` + +### Trigonometric Functions + +Trigonometric and hyperbolic functions. + +```python { .api } +def sin(input) -> Tensor: + """Sine element-wise.""" + +def cos(input) -> Tensor: + """Cosine element-wise.""" + +def tan(input) -> Tensor: + """Tangent element-wise.""" + +def asin(input) -> Tensor: + """Arcsine element-wise.""" + +def acos(input) -> Tensor: + """Arccosine element-wise.""" + +def atan(input) -> Tensor: + """Arctangent element-wise.""" + +def atan2(input, other) -> Tensor: + """Two-argument arctangent.""" + +def sinh(input) -> Tensor: + """Hyperbolic sine element-wise.""" + +def cosh(input) -> Tensor: + """Hyperbolic cosine element-wise.""" + +def tanh(input) -> Tensor: + """Hyperbolic tangent element-wise.""" +``` + +### Comparison Operations + +Element-wise comparison operations returning boolean tensors. + +```python { .api } +def eq(input, other) -> Tensor: + """Element-wise equality.""" + +def ne(input, other) -> Tensor: + """Element-wise inequality.""" + +def lt(input, other) -> Tensor: + """Element-wise less than.""" + +def le(input, other) -> Tensor: + """Element-wise less than or equal.""" + +def gt(input, other) -> Tensor: + """Element-wise greater than.""" + +def ge(input, other) -> Tensor: + """Element-wise greater than or equal.""" + +def equal(input, other) -> bool: + """True if tensors are element-wise equal.""" + +def allclose(input, other, rtol=1e-05, atol=1e-08, equal_nan=False) -> bool: + """True if tensors are approximately equal.""" +``` + +### Reduction Operations + +Operations that reduce tensor dimensions by aggregating values. + +```python { .api } +def sum(input, dim=None, keepdim=False, *, dtype=None) -> Tensor: + """Sum of tensor elements.""" + +def mean(input, dim=None, keepdim=False, *, dtype=None) -> Tensor: + """Mean of tensor elements.""" + +def median(input, dim=None, keepdim=False) -> Tensor: + """Median of tensor elements.""" + +def mode(input, dim=None, keepdim=False) -> Tensor: + """Mode of tensor elements.""" + +def std(input, dim=None, keepdim=False, *, dtype=None) -> Tensor: + """Standard deviation.""" + +def var(input, dim=None, keepdim=False, *, dtype=None) -> Tensor: + """Variance.""" + +def max(input, dim=None, keepdim=False) -> Tensor: + """Maximum values.""" + +def min(input, dim=None, keepdim=False) -> Tensor: + """Minimum values.""" + +def argmax(input, dim=None, keepdim=False) -> Tensor: + """Indices of maximum values.""" + +def argmin(input, dim=None, keepdim=False) -> Tensor: + """Indices of minimum values.""" + +def prod(input, dim=None, keepdim=False, *, dtype=None) -> Tensor: + """Product of tensor elements.""" + +def all(input, dim=None, keepdim=False) -> Tensor: + """True if all elements are True.""" + +def any(input, dim=None, keepdim=False) -> Tensor: + """True if any elements are True.""" +``` + +### Linear Algebra Operations + +Core linear algebra operations for matrices and vectors. + +```python { .api } +def matmul(input, other) -> Tensor: + """Matrix multiplication.""" + +def mm(input, mat2) -> Tensor: + """Matrix multiplication (2D tensors only).""" + +def bmm(input, mat2) -> Tensor: + """Batch matrix multiplication.""" + +def dot(input, other) -> Tensor: + """Dot product of vectors.""" + +def mv(input, vec) -> Tensor: + """Matrix-vector multiplication.""" + +def outer(input, vec2) -> Tensor: + """Outer product of vectors.""" + +def cross(input, other, dim=None) -> Tensor: + """Cross product.""" + +def norm(input, p='fro', dim=None, keepdim=False, *, dtype=None) -> Tensor: + """Matrix or vector norm.""" +``` + +### Tensor Properties and Utilities + +Functions for inspecting and manipulating tensor properties. + +```python { .api } +def is_tensor(obj) -> bool: + """Check if object is a tensor.""" + +def numel(input) -> int: + """Number of elements in tensor.""" + +def typename(o) -> str: + """Type name of tensor.""" + +def is_floating_point(input) -> bool: + """Check if tensor has floating point data type.""" + +def is_complex(input) -> bool: + """Check if tensor has complex data type.""" + +def is_signed(input) -> bool: + """Check if tensor has signed data type.""" + +def clone(input) -> Tensor: + """Create copy of tensor.""" + +def detach(input) -> Tensor: + """Detach tensor from computation graph.""" +``` + +## Usage Examples + +### Basic Tensor Operations + +```python +import torch + +# Create tensors +x = torch.tensor([[1, 2], [3, 4]], dtype=torch.float32) +y = torch.rand(2, 2) + +# Basic operations +z = torch.add(x, y) +product = torch.matmul(x, y) +mean_val = torch.mean(x) + +# Shape manipulation +reshaped = torch.reshape(x, (4,)) +transposed = torch.transpose(x, 0, 1) + +# Indexing +selected = torch.index_select(x, 0, torch.tensor([0])) +mask = x > 2 +masked = torch.masked_select(x, mask) + +print(f"Original: {x}") +print(f"Sum: {z}") +print(f"Mean: {mean_val}") +print(f"Reshaped: {reshaped}") +print(f"Masked: {masked}") +``` + +### GPU Operations + +```python +import torch + +# Check CUDA availability +if torch.cuda.is_available(): + device = torch.device('cuda') + + # Create tensors on GPU + x = torch.tensor([[1, 2], [3, 4]], device=device, dtype=torch.float32) + y = torch.rand(2, 2, device=device) + + # Operations are performed on GPU + z = torch.matmul(x, y) + + # Move back to CPU if needed + z_cpu = z.cpu() + + print(f"GPU result: {z}") + print(f"CPU result: {z_cpu}") +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-torch/docs/training.md b/.tessl/tiles/tessl/pypi-torch/docs/training.md new file mode 100644 index 0000000..9d69960 --- /dev/null +++ b/.tessl/tiles/tessl/pypi-torch/docs/training.md @@ -0,0 +1,631 @@ +# Training and Optimization + +Optimizers, learning rate schedulers, and training utilities for model optimization and parameter updates. The torch.optim module provides optimization algorithms and learning rate scheduling strategies. + +## Capabilities + +### Optimizers + +Optimization algorithms for updating model parameters during training. + +```python { .api } +class Optimizer: + """Base class for all optimizers.""" + def __init__(self, params, defaults): ... + def state_dict(self): + """Return optimizer state dictionary.""" + def load_state_dict(self, state_dict): + """Load optimizer state.""" + def zero_grad(self, set_to_none: bool = False): + """Set gradients to zero.""" + def step(self, closure=None): + """Perform optimization step.""" + def add_param_group(self, param_group): + """Add parameter group.""" +``` + +### SGD Optimizers + +Stochastic Gradient Descent and variants. + +```python { .api } +class SGD(Optimizer): + """Stochastic Gradient Descent optimizer.""" + def __init__(self, params, lr, momentum=0, dampening=0, weight_decay=0, nesterov=False): + """ + Parameters: + - params: Iterable of parameters or parameter groups + - lr: Learning rate + - momentum: Momentum factor (default: 0) + - dampening: Dampening for momentum (default: 0) + - weight_decay: Weight decay (L2 penalty) (default: 0) + - nesterov: Enable Nesterov momentum (default: False) + """ + def step(self, closure=None): ... + +class ASGD(Optimizer): + """Averaged Stochastic Gradient Descent.""" + def __init__(self, params, lr=1e-2, lambd=1e-4, alpha=0.75, t0=1e6, weight_decay=0): + """ + Parameters: + - params: Iterable of parameters + - lr: Learning rate (default: 1e-2) + - lambd: Decay term (default: 1e-4) + - alpha: Power for eta update (default: 0.75) + - t0: Point at which to start averaging (default: 1e6) + - weight_decay: Weight decay (default: 0) + """ + def step(self, closure=None): ... +``` + +### Adam-family Optimizers + +Adam and its variants for adaptive learning rates. + +```python { .api } +class Adam(Optimizer): + """Adam optimizer.""" + def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, amsgrad=False): + """ + Parameters: + - params: Iterable of parameters + - lr: Learning rate (default: 1e-3) + - betas: Coefficients for momentum and squared gradient averaging (default: (0.9, 0.999)) + - eps: Term for numerical stability (default: 1e-8) + - weight_decay: Weight decay (default: 0) + - amsgrad: Use AMSGrad variant (default: False) + """ + def step(self, closure=None): ... + +class AdamW(Optimizer): + """AdamW optimizer with decoupled weight decay.""" + def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=1e-2, amsgrad=False): + """ + Parameters: + - params: Iterable of parameters + - lr: Learning rate (default: 1e-3) + - betas: Coefficients for momentum and squared gradient averaging + - eps: Term for numerical stability + - weight_decay: Weight decay coefficient (default: 1e-2) + - amsgrad: Use AMSGrad variant + """ + def step(self, closure=None): ... + +class Adamax(Optimizer): + """Adamax optimizer (Adam based on infinity norm).""" + def __init__(self, params, lr=2e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0): + """ + Parameters: + - params: Iterable of parameters + - lr: Learning rate (default: 2e-3) + - betas: Coefficients for momentum and squared gradient averaging + - eps: Term for numerical stability + - weight_decay: Weight decay + """ + def step(self, closure=None): ... + +class NAdam(Optimizer): + """NAdam optimizer (Adam with Nesterov momentum).""" + def __init__(self, params, lr=2e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0, momentum_decay=4e-3): + """ + Parameters: + - params: Iterable of parameters + - lr: Learning rate (default: 2e-3) + - betas: Coefficients for momentum and squared gradient averaging + - eps: Term for numerical stability + - weight_decay: Weight decay + - momentum_decay: Momentum decay + """ + def step(self, closure=None): ... + +class RAdam(Optimizer): + """RAdam optimizer (Rectified Adam).""" + def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0): + """ + Parameters: + - params: Iterable of parameters + - lr: Learning rate (default: 1e-3) + - betas: Coefficients for momentum and squared gradient averaging + - eps: Term for numerical stability + - weight_decay: Weight decay + """ + def step(self, closure=None): ... +``` + +### Adaptive Learning Rate Optimizers + +Optimizers that adapt learning rates based on gradient history. + +```python { .api } +class Adagrad(Optimizer): + """Adagrad optimizer.""" + def __init__(self, params, lr=1e-2, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10): + """ + Parameters: + - params: Iterable of parameters + - lr: Learning rate (default: 1e-2) + - lr_decay: Learning rate decay (default: 0) + - weight_decay: Weight decay (default: 0) + - initial_accumulator_value: Initial value for accumulator + - eps: Term for numerical stability + """ + def step(self, closure=None): ... + +class Adadelta(Optimizer): + """Adadelta optimizer.""" + def __init__(self, params, lr=1.0, rho=0.9, eps=1e-6, weight_decay=0): + """ + Parameters: + - params: Iterable of parameters + - lr: Coefficient that scales delta (default: 1.0) + - rho: Coefficient for squared gradient averaging (default: 0.9) + - eps: Term for numerical stability (default: 1e-6) + - weight_decay: Weight decay (default: 0) + """ + def step(self, closure=None): ... + +class RMSprop(Optimizer): + """RMSprop optimizer.""" + def __init__(self, params, lr=1e-2, alpha=0.99, eps=1e-8, weight_decay=0, momentum=0, centered=False): + """ + Parameters: + - params: Iterable of parameters + - lr: Learning rate (default: 1e-2) + - alpha: Smoothing constant (default: 0.99) + - eps: Term for numerical stability (default: 1e-8) + - weight_decay: Weight decay (default: 0) + - momentum: Momentum factor (default: 0) + - centered: Compute centered RMSprop (default: False) + """ + def step(self, closure=None): ... + +class Rprop(Optimizer): + """Rprop optimizer.""" + def __init__(self, params, lr=1e-2, etas=(0.5, 1.2), step_sizes=(1e-6, 50)): + """ + Parameters: + - params: Iterable of parameters + - lr: Learning rate (default: 1e-2) + - etas: Pair of (etaminus, etaplus) for multiplicative increase/decrease + - step_sizes: Pair of minimal and maximal allowed step sizes + """ + def step(self, closure=None): ... +``` + +### Advanced Optimizers + +Specialized optimization algorithms. + +```python { .api } +class LBFGS(Optimizer): + """Limited-memory BFGS optimizer.""" + def __init__(self, params, lr=1, max_iter=20, max_eval=None, tolerance_grad=1e-7, + tolerance_change=1e-9, history_size=100, line_search_fn=None): + """ + Parameters: + - params: Iterable of parameters + - lr: Learning rate (default: 1) + - max_iter: Maximum number of iterations per optimization step + - max_eval: Maximum number of function evaluations per step + - tolerance_grad: Termination tolerance on first order optimality + - tolerance_change: Termination tolerance on function/parameter changes + - history_size: Update history size + - line_search_fn: Line search function ('strong_wolfe' or None) + """ + def step(self, closure): ... + +class SparseAdam(Optimizer): + """Adam optimizer for sparse tensors.""" + def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8): + """ + Parameters: + - params: Iterable of parameters + - lr: Learning rate (default: 1e-3) + - betas: Coefficients for momentum and squared gradient averaging + - eps: Term for numerical stability + """ + def step(self, closure=None): ... + +class Adafactor(Optimizer): + """Adafactor optimizer for memory-efficient training.""" + def __init__(self, params, lr=None, eps2=1e-30, cliping_threshold=1.0, decay_rate=-0.8, + beta1=None, weight_decay=0.0, scale_parameter=True, relative_step=True): + """ + Parameters: + - params: Iterable of parameters + - lr: Learning rate (None for automatic scaling) + - eps2: Regularization constant for second moment + - cliping_threshold: Threshold of root mean square of final gradient update + - decay_rate: Coefficient for moving average of squared gradient + - beta1: Coefficient for moving average of gradient + - weight_decay: Weight decay + - scale_parameter: Scale learning rate by root mean square of parameter + - relative_step: Set learning rate relative to current step + """ + def step(self, closure=None): ... +``` + +### Learning Rate Schedulers + +Learning rate scheduling strategies for training optimization. + +```python { .api } +class LRScheduler: + """Base class for learning rate schedulers.""" + def __init__(self, optimizer, last_epoch=-1, verbose=False): ... + def state_dict(self): + """Return scheduler state dictionary.""" + def load_state_dict(self, state_dict): + """Load scheduler state.""" + def get_last_lr(self): + """Return last computed learning rates.""" + def step(self, epoch=None): + """Update learning rates.""" + +class StepLR(LRScheduler): + """Decay learning rate by gamma every step_size epochs.""" + def __init__(self, optimizer, step_size, gamma=0.1, last_epoch=-1, verbose=False): + """ + Parameters: + - optimizer: Wrapped optimizer + - step_size: Period of learning rate decay + - gamma: Multiplicative factor of learning rate decay (default: 0.1) + - last_epoch: Index of last epoch (default: -1) + - verbose: Print message on every update (default: False) + """ + +class MultiStepLR(LRScheduler): + """Decay learning rate by gamma at specified milestones.""" + def __init__(self, optimizer, milestones, gamma=0.1, last_epoch=-1, verbose=False): + """ + Parameters: + - optimizer: Wrapped optimizer + - milestones: List of epoch indices for decay + - gamma: Multiplicative factor of learning rate decay + - last_epoch: Index of last epoch + - verbose: Print message on every update + """ + +class ExponentialLR(LRScheduler): + """Decay learning rate by gamma every epoch.""" + def __init__(self, optimizer, gamma, last_epoch=-1, verbose=False): + """ + Parameters: + - optimizer: Wrapped optimizer + - gamma: Multiplicative factor of learning rate decay + - last_epoch: Index of last epoch + - verbose: Print message on every update + """ + +class CosineAnnealingLR(LRScheduler): + """Cosine annealing learning rate schedule.""" + def __init__(self, optimizer, T_max, eta_min=0, last_epoch=-1, verbose=False): + """ + Parameters: + - optimizer: Wrapped optimizer + - T_max: Maximum number of iterations + - eta_min: Minimum learning rate (default: 0) + - last_epoch: Index of last epoch + - verbose: Print message on every update + """ + +class CosineAnnealingWarmRestarts(LRScheduler): + """Cosine annealing with warm restarts.""" + def __init__(self, optimizer, T_0, T_mult=1, eta_min=0, last_epoch=-1, verbose=False): + """ + Parameters: + - optimizer: Wrapped optimizer + - T_0: Number of iterations for first restart + - T_mult: Factor to increase T_i after restart (default: 1) + - eta_min: Minimum learning rate (default: 0) + - last_epoch: Index of last epoch + - verbose: Print message on every update + """ + +class ReduceLROnPlateau: + """Reduce learning rate when metric stops improving.""" + def __init__(self, optimizer, mode='min', factor=0.1, patience=10, verbose=False, + threshold=1e-4, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-8): + """ + Parameters: + - optimizer: Wrapped optimizer + - mode: 'min' or 'max' for metric improvement direction + - factor: Factor to reduce learning rate (default: 0.1) + - patience: Number of epochs with no improvement to wait + - verbose: Print message when reducing lr + - threshold: Threshold for measuring new optimum + - threshold_mode: 'rel' or 'abs' for threshold comparison + - cooldown: Number of epochs to wait before resuming normal operation + - min_lr: Lower bound on learning rate + - eps: Minimal decay applied to lr + """ + def step(self, metrics, epoch=None): ... + +class CyclicLR(LRScheduler): + """Cyclical learning rate policy.""" + def __init__(self, optimizer, base_lr, max_lr, step_size_up=2000, step_size_down=None, + mode='triangular', gamma=1.0, scale_fn=None, scale_mode='cycle', cycle_momentum=True, + base_momentum=0.8, max_momentum=0.9, last_epoch=-1, verbose=False): + """ + Parameters: + - optimizer: Wrapped optimizer + - base_lr: Lower learning rate boundary + - max_lr: Upper learning rate boundary + - step_size_up: Number of training iterations in increasing half + - step_size_down: Number of training iterations in decreasing half + - mode: 'triangular', 'triangular2', or 'exp_range' + - gamma: Constant in 'exp_range' scaling function + - scale_fn: Custom scaling policy function + - scale_mode: 'cycle' or 'iterations' + - cycle_momentum: Cycle momentum inversely to learning rate + - base_momentum: Lower momentum boundary + - max_momentum: Upper momentum boundary + - last_epoch: Index of last epoch + - verbose: Print message on every update + """ + +class OneCycleLR(LRScheduler): + """One cycle learning rate policy.""" + def __init__(self, optimizer, max_lr, total_steps=None, epochs=None, steps_per_epoch=None, + pct_start=0.3, anneal_strategy='cos', cycle_momentum=True, base_momentum=0.85, + max_momentum=0.95, div_factor=25.0, final_div_factor=1e4, three_phase=False, last_epoch=-1, verbose=False): + """ + Parameters: + - optimizer: Wrapped optimizer + - max_lr: Upper learning rate boundary + - total_steps: Total number of steps in cycle + - epochs: Number of epochs (alternative to total_steps) + - steps_per_epoch: Steps per epoch (with epochs) + - pct_start: Percentage of cycle spent increasing learning rate + - anneal_strategy: 'cos' or 'linear' annealing strategy + - cycle_momentum: Cycle momentum inversely to learning rate + - base_momentum: Lower momentum boundary + - max_momentum: Upper momentum boundary + - div_factor: Determines initial learning rate (max_lr/div_factor) + - final_div_factor: Determines minimum learning rate (max_lr/(div_factor*final_div_factor)) + - three_phase: Use three phase schedule + - last_epoch: Index of last epoch + - verbose: Print message on every update + """ +``` + +### Gradient Processing + +Utilities for gradient manipulation and processing. + +```python { .api } +def clip_grad_norm_(parameters, max_norm, norm_type=2.0, error_if_nonfinite=False): + """ + Clip gradient norm of parameters. + + Parameters: + - parameters: Iterable of parameters or single tensor + - max_norm: Maximum norm of gradients + - norm_type: Type of norm (default: 2.0) + - error_if_nonfinite: Raise error if total norm is NaN or inf + + Returns: + Total norm of the parameters + """ + +def clip_grad_value_(parameters, clip_value): + """ + Clip gradient values to specified range. + + Parameters: + - parameters: Iterable of parameters or single tensor + - clip_value: Maximum absolute value for gradients + """ +``` + +### Stochastic Weight Averaging + +Utilities for stochastic weight averaging to improve generalization. + +```python { .api } +class AveragedModel(nn.Module): + """Averaged model for stochastic weight averaging.""" + def __init__(self, model, device=None, avg_fn=None, use_buffers=False): + """ + Parameters: + - model: Model to average + - device: Device to store averaged parameters + - avg_fn: Function to compute running average + - use_buffers: Whether to average buffers + """ + def update_parameters(self, model): ... + +class SWALR(LRScheduler): + """Learning rate scheduler for stochastic weight averaging.""" + def __init__(self, optimizer, swa_lr, anneal_epochs=10, anneal_strategy='cos', last_epoch=-1): + """ + Parameters: + - optimizer: Wrapped optimizer + - swa_lr: SWA learning rate + - anneal_epochs: Number of epochs for annealing (default: 10) + - anneal_strategy: 'cos' or 'linear' annealing strategy + - last_epoch: Index of last epoch + """ +``` + +## Usage Examples + +### Basic Training Loop + +```python +import torch +import torch.nn as nn +import torch.optim as optim +from torch.utils.data import DataLoader + +# Setup model, loss, and optimizer +model = nn.Sequential( + nn.Linear(784, 128), + nn.ReLU(), + nn.Linear(128, 10) +) +criterion = nn.CrossEntropyLoss() +optimizer = optim.Adam(model.parameters(), lr=0.001) + +# Training loop +def train_epoch(model, dataloader, criterion, optimizer): + model.train() + total_loss = 0 + + for batch_idx, (data, targets) in enumerate(dataloader): + # Zero gradients + optimizer.zero_grad() + + # Forward pass + outputs = model(data) + loss = criterion(outputs, targets) + + # Backward pass + loss.backward() + + # Gradient clipping (optional) + torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) + + # Update parameters + optimizer.step() + + total_loss += loss.item() + + return total_loss / len(dataloader) + +# Example usage +# train_loader = DataLoader(dataset, batch_size=32, shuffle=True) +# loss = train_epoch(model, train_loader, criterion, optimizer) +# print(f"Training loss: {loss:.4f}") +``` + +### Learning Rate Scheduling + +```python +import torch +import torch.optim as optim +from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau + +# Setup optimizer and scheduler +optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9) +scheduler = StepLR(optimizer, step_size=30, gamma=0.1) + +# Alternative: Reduce on plateau +# scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5) + +# Training loop with scheduler +for epoch in range(100): + train_loss = train_epoch(model, train_loader, criterion, optimizer) + val_loss = validate(model, val_loader, criterion) + + # Step scheduler + scheduler.step() # For StepLR + # scheduler.step(val_loss) # For ReduceLROnPlateau + + current_lr = optimizer.param_groups[0]['lr'] + print(f"Epoch {epoch}: Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, LR: {current_lr:.6f}") +``` + +### Advanced Optimization with Multiple Parameter Groups + +```python +import torch +import torch.optim as optim + +# Different learning rates for different parts of the model +model = nn.Sequential( + nn.Linear(784, 128), + nn.ReLU(), + nn.Linear(128, 10) +) + +# Create parameter groups +params = [ + {'params': model[0].parameters(), 'lr': 0.001}, # First layer + {'params': model[2].parameters(), 'lr': 0.01} # Last layer +] + +optimizer = optim.Adam(params, weight_decay=1e-4) + +# Training with different learning rates +for epoch in range(100): + for batch_idx, (data, targets) in enumerate(train_loader): + optimizer.zero_grad() + outputs = model(data) + loss = criterion(outputs, targets) + loss.backward() + optimizer.step() +``` + +### Stochastic Weight Averaging + +```python +import torch +import torch.optim as optim +from torch.optim.swa_utils import AveragedModel, SWALR + +# Setup model and optimizer +model = nn.Sequential(nn.Linear(10, 5), nn.ReLU(), nn.Linear(5, 1)) +optimizer = optim.SGD(model.parameters(), lr=0.1) + +# Create averaged model and SWA scheduler +swa_model = AveragedModel(model) +swa_scheduler = SWALR(optimizer, swa_lr=0.05) + +# Training with SWA +swa_start_epoch = 80 +for epoch in range(100): + train_loss = train_epoch(model, train_loader, criterion, optimizer) + + if epoch >= swa_start_epoch: + swa_model.update_parameters(model) + swa_scheduler.step() + else: + # Regular scheduler before SWA + regular_scheduler.step() + + print(f"Epoch {epoch}: Loss: {train_loss:.4f}") + +# Update SWA batch normalization statistics +torch.optim.swa_utils.update_bn(train_loader, swa_model) + +# Use SWA model for inference +swa_model.eval() +``` + +### One Cycle Learning Rate Policy + +```python +import torch +import torch.optim as optim +from torch.optim.lr_scheduler import OneCycleLR + +# Setup optimizer +optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9) + +# One cycle scheduler +steps_per_epoch = len(train_loader) +scheduler = OneCycleLR( + optimizer, + max_lr=0.1, + epochs=100, + steps_per_epoch=steps_per_epoch, + pct_start=0.3, + div_factor=25, + final_div_factor=1e4 +) + +# Training loop +for epoch in range(100): + for batch_idx, (data, targets) in enumerate(train_loader): + optimizer.zero_grad() + outputs = model(data) + loss = criterion(outputs, targets) + loss.backward() + optimizer.step() + + # Step after each batch + scheduler.step() + + print(f"Epoch {epoch}: LR: {optimizer.param_groups[0]['lr']:.6f}") +``` \ No newline at end of file diff --git a/.tessl/tiles/tessl/pypi-torch/tile.json b/.tessl/tiles/tessl/pypi-torch/tile.json new file mode 100644 index 0000000..e70a90a --- /dev/null +++ b/.tessl/tiles/tessl/pypi-torch/tile.json @@ -0,0 +1,7 @@ +{ + "name": "tessl/pypi-torch", + "version": "2.8.0", + "docs": "docs/index.md", + "describes": "pkg:pypi/torch@2.8.0", + "summary": "Deep learning framework providing tensor computation with GPU acceleration and dynamic neural networks with automatic differentiation" +} \ No newline at end of file diff --git a/tessl.json b/tessl.json index 06cd46b..a21906c 100644 --- a/tessl.json +++ b/tessl.json @@ -97,6 +97,30 @@ }, "tessl/pypi-wcwidth": { "version": "0.2.0" + }, + "tessl/pypi-anthropic": { + "version": "0.75.0" + }, + "tessl/pypi-testcontainers": { + "version": "4.12.0" + }, + "tessl/pypi-pandas": { + "version": "2.3.0" + }, + "tessl/pypi-torch": { + "version": "2.8.0" + }, + "tessl/pypi-tensorflow": { + "version": "2.20.0" + }, + "tessl/pypi-jax": { + "version": "0.7.0" + }, + "tessl/pypi-pyarrow": { + "version": "21.0.0" + }, + "tessl/pypi-pyrsistent": { + "version": "0.20.0" } } }