litgpt
litgpt copied to clipboard
I'm doing an image generation experiment, but my script outputs a json file, how do I train a Transformer model to generate a pixel representation of an image?
I'm doing an image generation experiment, but my script outputs a json file, how do I train a Transformer model to generate a pixel representation of an image?
import cv2
import json
import numpy as np
import os
from PIL import Image
def image_to_text(image_path, text_path):
# Read the image and convert to grayscale
image = cv2.imread(image_path)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply thresholding to get a binary image
_, binary = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
# Find contours
contours, _ = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours_list = [contour.flatten().tolist() for contour in contours] # Flatten and convert to list
# Convert image to list of pixel values
pixels = image.flatten().tolist()
# Save mode and size
mode = image.shape[2] if len(image.shape) == 3 else 1 # Color if 3 channels, grayscale if 2
size = image.shape[:2]
# Write pixel data and contour information to text file
with open(text_path, 'w') as text_file:
json.dump({'mode': mode, 'size': size, 'pixels': pixels, 'contours': contours_list}, text_file)
def text_to_image(text_path, output_image_path):
# Read pixel data and contour information from text file
with open(text_path, 'r') as text_file:
data = json.load(text_file)
mode = data['mode']
size = tuple(data['size'])
pixels = data['pixels']
contours_list = data['contours']
# Reconstruct the image from the pixel information
image_array = np.array(pixels, dtype=np.uint8)
if mode == 1:
image_array = image_array.reshape(size[0], size[1]) # Grayscale
else:
image_array = image_array.reshape(size[0], size[1], mode) # Color
img = Image.fromarray(image_array)
img.save(output_image_path)
# Reconstruct the image contours
contours = [np.array(contour).reshape(-1, 1, 2) for contour in contours_list] # Reshape to contour format
img_contours = cv2.imread(output_image_path)
cv2.drawContours(img_contours, contours, -1, (0, 255, 0), 2)
cv2.imwrite(output_image_path, img_contours)
def batch_process(input_folder, output_folder_text, output_folder_images):
# 确保输出文件夹存在
if not os.path.exists(output_folder_text):
os.makedirs(output_folder_text)
if not os.path.exists(output_folder_images):
os.makedirs(output_folder_images)
# 遍历文件夹中的所有图像文件
for filename in os.listdir(input_folder):
if filename.lower().endswith(('.jpg', '.png', '.jpeg')): # 处理常见的图像格式
print(f"Processing {filename}...")
image_path = os.path.join(input_folder, filename)
base_filename = os.path.splitext(filename)[0]
text_path = os.path.join(output_folder_text, base_filename + '.txt')
output_image_path = os.path.join(output_folder_images, filename)
# 图像到文本
image_to_text(image_path, text_path)
# 文本到图像
text_to_image(text_path, output_image_path)
# 使用示例
input_folder = 'D:/llama2.c-master/1/images'
output_folder_text = 'D:/llama2.c-master/1/text'
output_folder_images = 'D:/llama2.c-master/1/imagesout'
batch_process(input_folder, output_folder_text, output_folder_images)
@carmocca Can you help?
I'm sorry but I have no idea about what you are talking about :(
I'm sorry but I have no idea about what you are talking about :(
I'm sorry that my expression was wrong. This is my script, I'm doing an experiment with image generation, can I train a Transformer model to generate a pixel representation of an image?