ToTensor in PyTorch

Buy Me a Coffee☕ *Memos: My post explains how to convert and scale a PIL Image to an Image in PyTorch. My post explains Compose(). My post explains ToImage(). My post explains OxfordIIITPet(). ToTensor() can convert a PIL(Pillow library) Image, Image or ndarray to a tensor and scale the values of a PIL Image or ndarray to [0.0, 1,0] as shown below: *Memos: ToTensor() is deprecated so instead use Compose(transforms=[ToImage(), ToDtype(dtype=torch.float32, scale=True)]) according to the doc. A PIL Image is scaled to [0.0, 1.0] if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1). A ndarray is scaled to [0.0, 1.0] if the ndarray is uint8 which is [0, 255]. The 1st argument is img(Required-Type:PIL Image, Image or tensor/ndarray(int/float/complex/bool)): *Memos: A tensor can be 0D or more D. A ndarray must be 2D or 3D. Don't use img=. v2 is recommended to use according to V1 or V2? Which one should I use?. from torchvision.datasets import OxfordIIITPet from torchvision.transforms.v2 import ToImage, ToTensor import torch import numpy as np ToTensor() # ToTensor() PILImage_data = OxfordIIITPet( root="data", transform=None ) Image_data = OxfordIIITPet( root="data", transform=ToImage() ) Tensor_data = OxfordIIITPet( root="data", transform=ToTensor() ) Tensor_data # Dataset OxfordIIITPet # Number of datapoints: 3680 # Root location: data # StandardTransform # Transform: ToTensor() Tensor_data[0] # (tensor([[[0.1451, 0.1373, 0.1412, ..., 0.9686, 0.9765, 0.9765], # [0.1373, 0.1373, 0.1451, ..., 0.9647, 0.9725, 0.9765], # ..., # [0.1098, 0.1098, 0.1059, ..., 0.2314, 0.2549, 0.2980]], # [[0.0784, 0.0706, 0.0745, ..., 0.9725, 0.9725, 0.9725], # [0.0706, 0.0706, 0.0784, ..., 0.9686, 0.9686, 0.9725], # ..., # [0.1059, 0.1059, 0.1059, ..., 0.3686, 0.4157, 0.4588]], # [[0.0471, 0.0392, 0.0431, ..., 0.9922, 0.9922, 0.9922], # [0.0392, 0.0392, 0.0471, ..., 0.9843, 0.9882, 0.9922], # ..., # [0.1373, 0.1373, 0.1373, ..., 0.8392, 0.9098, 0.8745]]]), 0) Tensor_data[0][0].size() # torch.Size([3, 500, 394]) Tensor_data[0][0] # tensor([[[0.1451, 0.1373, 0.1412, ..., 0.9686, 0.9765, 0.9765], # [0.1373, 0.1373, 0.1451, ..., 0.9647, 0.9725, 0.9765], # ..., # [0.1098, 0.1098, 0.1059, ..., 0.2314, 0.2549, 0.2980]], # [[0.0784, 0.0706, 0.0745, ..., 0.9725, 0.9725, 0.9725], # [0.0706, 0.0706, 0.0784, ..., 0.9686, 0.9686, 0.9725], # ..., # [0.1059, 0.1059, 0.1059, ..., 0.3686, 0.4157, 0.4588]], # [[0.0471, 0.0392, 0.0431, ..., 0.9922, 0.9922, 0.9922], # [0.0392, 0.0392, 0.0471, ..., 0.9843, 0.9882, 0.9922], # ..., # [0.1373, 0.1373, 0.1373, ..., 0.8392, 0.9098, 0.8745]]]) Tensor_data[0][1] # 0 import matplotlib.pyplot as plt plt.imshow(X=Tensor_data[0][0]) # TypeError: Invalid shape (3, 500, 394) for image data tt = ToTensor() tt(PILImage_data[0][0]) # tensor([[[0.1451, 0.1373, 0.1412, ..., 0.9686, 0.9765, 0.9765], # [0.1373, 0.1373, 0.1451, ..., 0.9647, 0.9725, 0.9765], # ..., # [0.1098, 0.1098, 0.1059, ..., 0.2314, 0.2549, 0.2980]], # [[0.0784, 0.0706, 0.0745, ..., 0.9725, 0.9725, 0.9725], # [0.0706, 0.0706, 0.0784, ..., 0.9686, 0.9686, 0.9725], # ..., # [0.1059, 0.1059, 0.1059, ..., 0.3686, 0.4157, 0.4588]], # [[0.0471, 0.0392, 0.0431, ..., 0.9922, 0.9922, 0.9922], # [0.0392, 0.0392, 0.0471, ..., 0.9843, 0.9882, 0.9922], # ..., # [0.1373, 0.1373, 0.1373, ..., 0.8392, 0.9098, 0.8745]]]) tt(Image_data[0][0]) # Image([[[37, 35, 36, ..., 247, 249, 249], # [35, 35, 37, ..., 246, 248, 249], # ..., # [28, 28, 27, ..., 59, 65, 76]], # [[20, 18, 19, ..., 248, 248, 248], # [18, 18, 20, ..., 247, 247, 248], # ..., # [27, 27, 27, ..., 94, 106, 117]], # [[12, 10, 11, ..., 253, 253, 253], # [10, 10, 12, ..., 251, 252, 253], # ..., # [35, 35, 35, ..., 214, 232, 223]]], dtype=torch.uint8,) plt.imshow(X=tt(PILImage_data[0][0])) plt.imshow(X=tt(Image_data[0][0])) # TypeError: Invalid shape (3, 500, 394) for image data tt(torch.tensor(2)) # int64 tt(torch.tensor(2, dtype=torch.int64)) # tensor(2) tt(torch.tensor([0, 1, 2])) # int64 # tensor([0, 1, 2]) tt(torch.tensor([[0, 1, 2]])) # int64 # Image([[[0, 1, 2]]],) tt(torch.tensor([[[0, 1, 2]]])) # int64 # tensor([[[0, 1, 2]]]) tt(torch.tensor([[[[0, 1, 2]]]])) # int64 # tensor([[[[0, 1, 2]]]]) tt(torch.tensor([[[[[0, 1, 2]]]]])) # int64 # tensor([[[[[0, 1, 2]]]]]) tt(torch.tensor([[0, 1, 2]], dtype=torch.int32)) # tensor([[0, 1, 2]], dtype=torch.int32) tt(torch.tensor([[0, 1, 2]], dtype=torch.uint8)) # tensor([[0, 1, 2]], dtype=torch.uint8) tt(torch.tensor([[0., 1., 2.]])) # float32 tt(torch.tensor([[0., 1

Apr 22, 2025 - 07:12
 0
ToTensor in PyTorch

Buy Me a Coffee

*Memos:

ToTensor() can convert a PIL(Pillow library) Image, Image or ndarray to a tensor and scale the values of a PIL Image or ndarray to [0.0, 1,0] as shown below:
*Memos:

  • ToTensor() is deprecated so instead use Compose(transforms=[ToImage(), ToDtype(dtype=torch.float32, scale=True)]) according to the doc.
  • A PIL Image is scaled to [0.0, 1.0] if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1).
  • A ndarray is scaled to [0.0, 1.0] if the ndarray is uint8 which is [0, 255].
  • The 1st argument is img(Required-Type:PIL Image, Image or tensor/ndarray(int/float/complex/bool)): *Memos:
    • A tensor can be 0D or more D.
    • A ndarray must be 2D or 3D.
    • Don't use img=.
  • v2 is recommended to use according to V1 or V2? Which one should I use?.
from torchvision.datasets import OxfordIIITPet
from torchvision.transforms.v2 import ToImage, ToTensor
import torch
import numpy as np

ToTensor()
# ToTensor()

PILImage_data = OxfordIIITPet(
    root="data",
    transform=None
)

Image_data = OxfordIIITPet(
    root="data",
    transform=ToImage()
)

Tensor_data = OxfordIIITPet(
    root="data",
    transform=ToTensor()
)

Tensor_data
# Dataset OxfordIIITPet
#     Number of datapoints: 3680
#     Root location: data
#     StandardTransform
# Transform: ToTensor()

Tensor_data[0]
# (tensor([[[0.1451, 0.1373, 0.1412, ..., 0.9686, 0.9765, 0.9765],
#           [0.1373, 0.1373, 0.1451, ..., 0.9647, 0.9725, 0.9765],
#           ...,
#           [0.1098, 0.1098, 0.1059, ..., 0.2314, 0.2549, 0.2980]],
#          [[0.0784, 0.0706, 0.0745, ..., 0.9725, 0.9725, 0.9725],
#           [0.0706, 0.0706, 0.0784, ..., 0.9686, 0.9686, 0.9725],
#           ...,
#           [0.1059, 0.1059, 0.1059, ..., 0.3686, 0.4157, 0.4588]],
#          [[0.0471, 0.0392, 0.0431, ..., 0.9922, 0.9922, 0.9922],
#           [0.0392, 0.0392, 0.0471, ..., 0.9843, 0.9882, 0.9922],
#           ...,
#           [0.1373, 0.1373, 0.1373, ..., 0.8392, 0.9098, 0.8745]]]), 0)

Tensor_data[0][0].size()
# torch.Size([3, 500, 394])

Tensor_data[0][0]
# tensor([[[0.1451, 0.1373, 0.1412, ..., 0.9686, 0.9765, 0.9765],
#          [0.1373, 0.1373, 0.1451, ..., 0.9647, 0.9725, 0.9765],
#          ...,
#          [0.1098, 0.1098, 0.1059, ..., 0.2314, 0.2549, 0.2980]],
#         [[0.0784, 0.0706, 0.0745, ..., 0.9725, 0.9725, 0.9725],
#          [0.0706, 0.0706, 0.0784, ..., 0.9686, 0.9686, 0.9725],
#          ...,
#          [0.1059, 0.1059, 0.1059, ..., 0.3686, 0.4157, 0.4588]],
#         [[0.0471, 0.0392, 0.0431, ..., 0.9922, 0.9922, 0.9922],
#          [0.0392, 0.0392, 0.0471, ..., 0.9843, 0.9882, 0.9922],
#          ...,
#          [0.1373, 0.1373, 0.1373, ..., 0.8392, 0.9098, 0.8745]]])

Tensor_data[0][1]
# 0

import matplotlib.pyplot as plt

plt.imshow(X=Tensor_data[0][0])
# TypeError: Invalid shape (3, 500, 394) for image data

tt = ToTensor()

tt(PILImage_data[0][0])
# tensor([[[0.1451, 0.1373, 0.1412, ..., 0.9686, 0.9765, 0.9765],
#          [0.1373, 0.1373, 0.1451, ..., 0.9647, 0.9725, 0.9765],
#          ...,
#          [0.1098, 0.1098, 0.1059, ..., 0.2314, 0.2549, 0.2980]],
#         [[0.0784, 0.0706, 0.0745, ..., 0.9725, 0.9725, 0.9725],
#          [0.0706, 0.0706, 0.0784, ..., 0.9686, 0.9686, 0.9725],
#          ...,
#          [0.1059, 0.1059, 0.1059, ..., 0.3686, 0.4157, 0.4588]],
#         [[0.0471, 0.0392, 0.0431, ..., 0.9922, 0.9922, 0.9922],
#          [0.0392, 0.0392, 0.0471, ..., 0.9843, 0.9882, 0.9922],
#          ...,
#          [0.1373, 0.1373, 0.1373, ..., 0.8392, 0.9098, 0.8745]]])

tt(Image_data[0][0])
# Image([[[37, 35, 36, ..., 247, 249, 249],
#         [35, 35, 37, ..., 246, 248, 249],
#         ...,
#         [28, 28, 27, ...,  59,  65,  76]],
#        [[20, 18, 19, ..., 248, 248, 248],
#         [18, 18, 20, ..., 247, 247, 248],
#         ...,
#         [27, 27, 27, ...,  94, 106, 117]],
#        [[12, 10, 11, ..., 253, 253, 253],
#         [10, 10, 12, ..., 251, 252, 253],
#         ...,
#         [35, 35, 35, ..., 214, 232, 223]]], dtype=torch.uint8,)

plt.imshow(X=tt(PILImage_data[0][0]))
plt.imshow(X=tt(Image_data[0][0]))
# TypeError: Invalid shape (3, 500, 394) for image data

tt(torch.tensor(2)) # int64
tt(torch.tensor(2, dtype=torch.int64))
# tensor(2)

tt(torch.tensor([0, 1, 2])) # int64
# tensor([0, 1, 2])

tt(torch.tensor([[0, 1, 2]])) # int64
# Image([[[0, 1, 2]]],)

tt(torch.tensor([[[0, 1, 2]]])) # int64
# tensor([[[0, 1, 2]]])

tt(torch.tensor([[[[0, 1, 2]]]])) # int64
# tensor([[[[0, 1, 2]]]])

tt(torch.tensor([[[[[0, 1, 2]]]]])) # int64
# tensor([[[[[0, 1, 2]]]]])

tt(torch.tensor([[0, 1, 2]], dtype=torch.int32))
# tensor([[0, 1, 2]], dtype=torch.int32)

tt(torch.tensor([[0, 1, 2]], dtype=torch.uint8))
# tensor([[0, 1, 2]], dtype=torch.uint8)

tt(torch.tensor([[0., 1., 2.]])) # float32
tt(torch.tensor([[0., 1., 2.]], dtype=torch.float32))
# tensor([[0., 1., 2.]])

tt(torch.tensor([[0., 1., 2.]], dtype=torch.float64))
# tensor([[0., 1., 2.]], dtype=torch.float64)

tt(torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j]])) # complex64
tt(torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j]], dtype=torch.complex64))
# tensor([[0.+0.j, 1.+0.j, 2.+0.j]])

tt(torch.tensor([[0.+0.j, 1.+0.j, 2.+0.j]], dtype=torch.complex32))
# tensor([[0.+0.j, 1.+0.j, 2.+0.j]], dtype=torch.complex32)

tt(torch.tensor([[True, False, True]])) # bool
tt(torch.tensor([[True, False, True]], dtype=torch.bool))
# tensor([[True, False, True]])

tt(np.array([[0, 1, 2]])) # int32
tt(np.array([[0, 1, 2]], dtype=np.int32)) # int32
# tensor([[[0, 1, 2]]], dtype=torch.int32)

tt(np.array([[[0, 1, 2]]])) # int32
# tensor([[[0]], [[1]], [[2]]], dtype=torch.int32)

tt(np.array([[0, 1, 2]], dtype=np.int64))
# tensor([[[0, 1, 2]]])

tt(np.array([[0, 1, 2.]], dtype=np.uint8))
# tensor([[[0.0000, 0.0039, 0.0078]]])

tt(np.array([[0, 1, 2.]], dtype=np.uint16))
# tensor([[[0, 1, 2]]], dtype=torch.uint16)

tt(np.array([[0., 1., 2.]])) # float64
tt(np.array([[0., 1., 2.]], dtype=np.float64))
# tensor([[[0., 1., 2.]]], dtype=torch.float64)

tt(np.array([[0., 1., 2.]], dtype=np.float32))
# tensor([[[0., 1., 2.]]])

tt(np.array([[0.+0.j, 1.+0.j, 2.+0.j]])) # complex128
tt(np.array([[0.+0.j, 1.+0.j, 2.+0.j]], dtype=np.complex128))
# tensor([[[0.+0.j, 1.+0.j, 2.+0.j]]], dtype=torch.complex128)

tt(np.array([[0.+0.j, 1.+0.j, 2.+0.j]], dtype=np.complex64))
# tensor([[[0.+0.j, 1.+0.j, 2.+0.j]]])

tt(np.array([[True, False, True]])) # bool
tt(np.array([[True, False, True]], dtype=bool))
# tensor([[[True, False, True]]])