litgpt icon indicating copy to clipboard operation
litgpt copied to clipboard

I'm doing an image generation experiment, but my script outputs a json file, how do I train a Transformer model to generate a pixel representation of an image?

Open win10ogod opened this issue 1 year ago • 2 comments

I'm doing an image generation experiment, but my script outputs a json file, how do I train a Transformer model to generate a pixel representation of an image?

import cv2
import json
import numpy as np
import os
from PIL import Image

def image_to_text(image_path, text_path):
    # Read the image and convert to grayscale
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply thresholding to get a binary image
    _, binary = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    
    # Find contours
    contours, _ = cv2.findContours(binary, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contours_list = [contour.flatten().tolist() for contour in contours]  # Flatten and convert to list

    # Convert image to list of pixel values
    pixels = image.flatten().tolist()

    # Save mode and size
    mode = image.shape[2] if len(image.shape) == 3 else 1  # Color if 3 channels, grayscale if 2
    size = image.shape[:2]

    # Write pixel data and contour information to text file
    with open(text_path, 'w') as text_file:
        json.dump({'mode': mode, 'size': size, 'pixels': pixels, 'contours': contours_list}, text_file)

def text_to_image(text_path, output_image_path):
    # Read pixel data and contour information from text file
    with open(text_path, 'r') as text_file:
        data = json.load(text_file)
        mode = data['mode']
        size = tuple(data['size'])
        pixels = data['pixels']
        contours_list = data['contours']

    # Reconstruct the image from the pixel information
    image_array = np.array(pixels, dtype=np.uint8)
    if mode == 1:
        image_array = image_array.reshape(size[0], size[1])  # Grayscale
    else:
        image_array = image_array.reshape(size[0], size[1], mode)  # Color

    img = Image.fromarray(image_array)
    img.save(output_image_path)

    # Reconstruct the image contours
    contours = [np.array(contour).reshape(-1, 1, 2) for contour in contours_list]  # Reshape to contour format
    img_contours = cv2.imread(output_image_path)
    cv2.drawContours(img_contours, contours, -1, (0, 255, 0), 2)
    cv2.imwrite(output_image_path, img_contours)


def batch_process(input_folder, output_folder_text, output_folder_images):
    # 确保输出文件夹存在
    if not os.path.exists(output_folder_text):
        os.makedirs(output_folder_text)
    if not os.path.exists(output_folder_images):
        os.makedirs(output_folder_images)
    
    # 遍历文件夹中的所有图像文件
    for filename in os.listdir(input_folder):
        if filename.lower().endswith(('.jpg', '.png', '.jpeg')):  # 处理常见的图像格式
            print(f"Processing {filename}...")
            image_path = os.path.join(input_folder, filename)
            base_filename = os.path.splitext(filename)[0]
            text_path = os.path.join(output_folder_text, base_filename + '.txt')
            output_image_path = os.path.join(output_folder_images, filename)
            
            # 图像到文本
            image_to_text(image_path, text_path)
            
            # 文本到图像
            text_to_image(text_path, output_image_path)

# 使用示例
input_folder = 'D:/llama2.c-master/1/images'
output_folder_text = 'D:/llama2.c-master/1/text'
output_folder_images = 'D:/llama2.c-master/1/imagesout'

batch_process(input_folder, output_folder_text, output_folder_images)

@carmocca Can you help?

win10ogod avatar Feb 23 '24 01:02 win10ogod

I'm sorry but I have no idea about what you are talking about :(

carmocca avatar Feb 23 '24 16:02 carmocca

I'm sorry but I have no idea about what you are talking about :(

I'm sorry that my expression was wrong. This is my script, I'm doing an experiment with image generation, can I train a Transformer model to generate a pixel representation of an image?

win10ogod avatar Feb 24 '24 02:02 win10ogod