codeflash-agent/.tessl/tiles/tessl/pypi-tensorflow/docs/image.md

# Image Processing

Comprehensive image manipulation, transformation, and computer vision operations for preprocessing and augmentation. These operations provide the tools needed for image-based machine learning workflows.

## Capabilities

### Image Decoding and Encoding

Operations for reading and writing images in various formats.

```python { .api }
def decode_image(contents, channels=None, dtype=tf.uint8, name=None, expand_animations=True):
    """
    Function for decode_bmp, decode_gif, decode_jpeg, and decode_png.

    Parameters:
    - contents: 0-D. The encoded image bytes
    - channels: An optional int. Defaults to 0. Number of color channels for the decoded image
    - dtype: The desired DType of the returned Tensor
    - name: A name for the operation
    - expand_animations: Controls the shape of the returned op's output

    Returns:
    Tensor with type dtype and a 3- or 4-dimensional shape
    """

def decode_jpeg(contents, channels=0, ratio=1, fancy_upsampling=True,
                try_recover_truncated=False, acceptable_fraction=1,
                dct_method="", name=None):
    """
    Decode a JPEG-encoded image to a uint8 tensor.

    Parameters:
    - contents: A Tensor of type string. 0-D. The JPEG-encoded image
    - channels: An optional int. Defaults to 0. Number of color channels for the decoded image
    - ratio: An optional int. Defaults to 1. Downscaling ratio
    - fancy_upsampling: An optional bool. Defaults to True. If true use a slower but nicer upsampling
    - try_recover_truncated: An optional bool. Defaults to False. If true try to recover an image from truncated input
    - acceptable_fraction: An optional float. Defaults to 1. The minimum required fraction of lines before a truncated input is accepted
    - dct_method: An optional string. Defaults to "". string specifying a hint about the algorithm used for decompression
    - name: A name for the operation

    Returns:
    A Tensor of type uint8
    """

def decode_png(contents, channels=0, dtype=tf.uint8, name=None):
    """
    Decode a PNG-encoded image to a uint8 or uint16 tensor.

    Parameters:
    - contents: A Tensor of type string. 0-D. The PNG-encoded image
    - channels: An optional int. Defaults to 0. Number of color channels for the decoded image
    - dtype: An optional tf.DType from: tf.uint8, tf.uint16. Defaults to tf.uint8
    - name: A name for the operation

    Returns:
    A Tensor of type dtype
    """

def encode_jpeg(image, format="", quality=95, progressive=False,
                optimize_size=False, chroma_downsampling=True,
                density_unit="in", x_density=300, y_density=300,
                xmp_metadata="", name=None):
    """
    JPEG-encode an image.

    Parameters:
    - image: A Tensor of type uint8. 3-D with shape [height, width, channels]
    - format: An optional string from: "", "grayscale", "rgb". Defaults to ""
    - quality: An optional int. Defaults to 95. Quality of the compression from 0 to 100
    - progressive: An optional bool. Defaults to False. If True, create a JPEG that loads progressively
    - optimize_size: An optional bool. Defaults to False. If True, spend CPU/RAM to reduce size with no quality change
    - chroma_downsampling: An optional bool. Defaults to True. See http://en.wikipedia.org/wiki/Chroma_subsampling
    - density_unit: An optional string from: "in", "cm". Defaults to "in". Unit used to specify x_density and y_density
    - x_density: An optional int. Defaults to 300. Horizontal pixels per density unit
    - y_density: An optional int. Defaults to 300. Vertical pixels per density unit
    - xmp_metadata: An optional string. Defaults to "". If not empty, embed this XMP metadata in the image header
    - name: A name for the operation

    Returns:
    A Tensor of type string
    """

def encode_png(image, compression=-1, name=None):
    """
    PNG-encode an image.

    Parameters:
    - image: A Tensor. Must be one of the following types: uint8, uint16. 3-D with shape [height, width, channels]
    - compression: An optional int. Defaults to -1. Compression level
    - name: A name for the operation

    Returns:
    A Tensor of type string
    """
```

### Image Resizing and Cropping

Operations for resizing and cropping images.

```python { .api }
def resize(images, size, method=ResizeMethod.BILINEAR, preserve_aspect_ratio=False,
           antialias=False, name=None):
    """
    Resize images to size using the specified method.

    Parameters:
    - images: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
    - size: A 1-D int32 Tensor of 2 elements: new_height, new_width
    - method: An image.ResizeMethod, or string equivalent
    - preserve_aspect_ratio: Whether to preserve the aspect ratio
    - antialias: Whether to use an anti-aliasing filter when downsampling an image
    - name: A name for this operation

    Returns:
    If images was 4-D, a 4-D float Tensor of shape [batch, new_height, new_width, channels]
    """

def resize_with_pad(image, target_height, target_width, method=ResizeMethod.BILINEAR, antialias=False):
    """
    Resizes and pads an image to a target width and height.

    Parameters:
    - image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
    - target_height: Target height
    - target_width: Target width
    - method: An image.ResizeMethod, or string equivalent
    - antialias: Whether to use an anti-aliasing filter when downsampling an image

    Returns:
    Resized and padded image
    """

def crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width):
    """
    Crops an image to a specified bounding box.

    Parameters:
    - image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
    - offset_height: Vertical coordinate of the top-left corner of the result in the input
    - offset_width: Horizontal coordinate of the top-left corner of the result in the input
    - target_height: Height of the result
    - target_width: Width of the result

    Returns:
    Cropped image(s)
    """

def central_crop(image, central_fraction):
    """
    Crop the central region of the image(s).

    Parameters:
    - image: Either a 3-D float Tensor of shape [height, width, depth], or a 4-D Tensor of shape [batch_size, height, width, depth]
    - central_fraction: float (0, 1], fraction of size to crop

    Returns:
    3-D / 4-D float Tensor, as per the input
    """

def random_crop(value, size, seed=None, name=None):
    """
    Randomly crops a tensor to a given size.

    Parameters:
    - value: Input tensor to crop
    - size: 1-D tensor with size the rank of value
    - seed: A shape [2] Tensor, the seed to the random number generator
    - name: A name for this operation

    Returns:
    A cropped tensor of the same rank as value and shape size
    """
```

### Image Transformations

Geometric transformations and spatial manipulations.

```python { .api }
def flip_left_right(image):
    """
    Flip an image horizontally (left to right).

    Parameters:
    - image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]

    Returns:
    A tensor of the same type and shape as image
    """

def flip_up_down(image):
    """
    Flip an image vertically (upside down).

    Parameters:
    - image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]

    Returns:
    A tensor of the same type and shape as image
    """

def transpose(image, name=None):
    """
    Transpose image(s) by swapping the height and width dimension.

    Parameters:
    - image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
    - name: A name for this operation

    Returns:
    A tensor of the same type and shape as image, transposed
    """

def rot90(image, k=1, name=None):
    """
    Rotate image(s) counter-clockwise by 90 degrees.

    Parameters:
    - image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
    - k: A scalar integer tensor. The number of times the image is rotated by 90 degrees
    - name: A name for this operation

    Returns:
    A rotated tensor of the same type and shape as image
    """

def random_flip_left_right(image, seed=None):
    """
    Randomly flip an image horizontally (left to right).

    Parameters:
    - image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
    - seed: A Python integer. Used to create a random seed

    Returns:
    A tensor of the same type and shape as image
    """

def random_flip_up_down(image, seed=None):
    """
    Randomly flips an image vertically (upside down).

    Parameters:
    - image: 4-D Tensor of shape [batch, height, width, channels] or 3-D Tensor of shape [height, width, channels]
    - seed: A Python integer. Used to create a random seed

    Returns:
    A tensor of the same type and shape as image
    """
```

### Color Space and Enhancement

Operations for color manipulation and image enhancement.

```python { .api }
def rgb_to_grayscale(images, name=None):
    """
    Converts one or more images from RGB to Grayscale.

    Parameters:
    - images: The RGB tensor to convert. The last dimension must have size 3 and should contain RGB values
    - name: A name for the operation

    Returns:
    The converted grayscale image(s)
    """

def grayscale_to_rgb(images, name=None):
    """
    Converts one or more images from Grayscale to RGB.

    Parameters:
    - images: The Grayscale tensor to convert. Last dimension must be size 1
    - name: A name for the operation

    Returns:
    The converted RGB image(s)
    """

def rgb_to_hsv(images, name=None):
    """
    Converts one or more images from RGB to HSV.

    Parameters:
    - images: A Tensor. Must be one of the following types: half, bfloat16, float32, float64
    - name: A name for the operation

    Returns:
    A Tensor. Has the same type as images
    """

def hsv_to_rgb(images, name=None):
    """
    Converts one or more images from HSV to RGB.

    Parameters:
    - images: A Tensor. Must be one of the following types: half, bfloat16, float32, float64
    - name: A name for the operation

    Returns:
    A Tensor. Has the same type as images
    """

def adjust_brightness(image, delta):
    """
    Adjust the brightness of RGB or Grayscale images.

    Parameters:
    - image: RGB image or images to adjust
    - delta: A scalar. Amount to add to the pixel values

    Returns:
    The brightness-adjusted image(s)
    """

def adjust_contrast(images, contrast_factor):
    """
    Adjust contrast of RGB or grayscale images.

    Parameters:
    - images: Images to adjust. At least 3-D
    - contrast_factor: A float multiplier for adjusting contrast

    Returns:
    The contrast-adjusted image or images
    """

def adjust_hue(image, delta, name=None):
    """
    Adjust hue of RGB images.

    Parameters:
    - image: RGB image or images. The image hue is adjusted by converting the image(s) to HSV and rotating the hue channel (H)
    - delta: float. How much to add to the hue channel
    - name: A name for this operation

    Returns:
    The hue-adjusted image or images
    """

def adjust_saturation(image, saturation_factor, name=None):
    """
    Adjust saturation of RGB images.

    Parameters:
    - image: RGB image or images. The image saturation is adjusted by converting the image to HSV and multiplying the saturation (S)
    - saturation_factor: float. Factor to multiply the saturation by
    - name: A name for this operation

    Returns:
    The saturation-adjusted image or images
    """

def random_brightness(image, max_delta, seed=None):
    """
    Adjust the brightness of images by a random factor.

    Parameters:
    - image: An image or images to adjust
    - max_delta: float, must be non-negative
    - seed: A Python integer. Used to create a random seed

    Returns:
    The brightness-adjusted image(s)
    """

def random_contrast(image, lower, upper, seed=None):
    """
    Adjust the contrast of an image or images by a random factor.

    Parameters:
    - image: An image tensor with 3 or more dimensions
    - lower: float. Lower bound for the random contrast factor
    - upper: float. Upper bound for the random contrast factor
    - seed: A Python integer. Used to create a random seed

    Returns:
    The contrast-adjusted tensor
    """
```

### Image Quality and Metrics

Operations for measuring image quality and computing metrics.

```python { .api }
def psnr(a, b, max_val, name=None):
    """
    Returns the Peak Signal-to-Noise Ratio between a and b.

    Parameters:
    - a: First set of images
    - b: Second set of images
    - max_val: The dynamic range of the images (i.e., the difference between the maximum the and minimum allowed values)
    - name: Namespace to embed the computation in

    Returns:
    The scalar PSNR between a and b. The returned tensor has type tf.float32 and shape [batch_size, 1]
    """

def ssim(img1, img2, max_val, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03):
    """
    Computes SSIM index between img1 and img2.

    Parameters:
    - img1: First image batch
    - img2: Second image batch
    - max_val: The dynamic range of the images (i.e., the difference between the maximum the and minimum allowed values)
    - filter_size: Default value 11 (size of gaussian filter)
    - filter_sigma: Default value 1.5 (width of gaussian filter)
    - k1: Default value 0.01
    - k2: Default value 0.03 (SSIM is less sensitivity to K2 for lower values, so it should be larger that K1)

    Returns:
    A tensor containing an SSIM value for each image in batch
    """

def total_variation(images, name=None):
    """
    Calculate and return the total variation for one or more images.

    Parameters:
    - images: A Tensor. Must be one of the following types: half, float32, float64
    - name: A name for the operation

    Returns:
    A Tensor. Has the same type as images
    """
```

## Usage Examples

```python
import tensorflow as tf
import numpy as np

# Read and decode images
image_string = tf.io.read_file('path/to/image.jpg')
image = tf.image.decode_jpeg(image_string, channels=3)

# Resize image
resized_image = tf.image.resize(image, [224, 224])

# Random augmentations
augmented_image = tf.image.random_flip_left_right(image)
augmented_image = tf.image.random_brightness(augmented_image, max_delta=0.1)
augmented_image = tf.image.random_contrast(augmented_image, lower=0.8, upper=1.2)

# Crop operations
central_cropped = tf.image.central_crop(image, central_fraction=0.8)
random_cropped = tf.image.random_crop(image, size=[100, 100, 3])

# Color space conversions
grayscale = tf.image.rgb_to_grayscale(image)
hsv_image = tf.image.rgb_to_hsv(image)

# Image processing pipeline for training
def preprocess_image(image_path, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [224, 224])
    image = tf.cast(image, tf.float32) / 255.0

    # Data augmentation
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, max_delta=0.1)
    image = tf.image.random_contrast(image, lower=0.9, upper=1.1)

    return image, label

# Batch processing
batch_size = 32
image_paths = ["path1.jpg", "path2.jpg", ...]  # List of image paths
labels = [0, 1, ...]  # Corresponding labels

dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
dataset = dataset.map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

# Quality metrics
img1 = tf.random.uniform([1, 256, 256, 3])
img2 = tf.random.uniform([1, 256, 256, 3])

psnr_value = tf.image.psnr(img1, img2, max_val=1.0)
ssim_value = tf.image.ssim(img1, img2, max_val=1.0)

print(f"PSNR: {psnr_value.numpy()}")
print(f"SSIM: {ssim_value.numpy()}")
```