arc-pytorch icon indicating copy to clipboard operation
arc-pytorch copied to clipboard

Scaling up

Open phobrain opened this issue 8 years ago • 1 comments

First issue in scaling up:

Looks like one needs to analyze all the photos to train with, to get preprocess() factors like in this example:

http://blog.outcome.io/pytorch-quick-start-classifying-an-image/

Seems someone would have written a program by now to take a set of images and output the numbers for

normalize = transforms.Normalize(
   mean=[0.485, 0.456, 0.406],
   std=[0.229, 0.224, 0.225]
)
preprocess = transforms.Compose([
   transforms.Scale(256),
   transforms.CenterCrop(224),
   transforms.ToTensor(),
   normalize
])

Here's a stab at it:

import numpy as np
from PIL import Image

pic_dir = '~/images/prun299'
fileList = pic_dir + '/files'

pixCount = 0
RGB = [0.0, 0.0, 0.0]

with open(fileList) as fp:
    for line in fp:
        file = pic_dir + "/" + line.rstrip()
        try:
            im = Image.open(file)
        except Exception, e:
            print "None: %s %s" % (file, str(e))
            continue

        for x in range(im.width):
            for y in range(im.height):
                pix = im.getpixel((x, y))
                RGB[0] += pix[0]
                RGB[1] += pix[1]
                RGB[2] += pix[2]
        pixCount += im.width * im.height
        im.close()

RGB[0] /= pixCount
RGB[1] /= pixCount
RGB[2] /= pixCount

DEV = [0.0, 0.0, 0.0]

print('pass 2')

with open(fileList) as fp:
    for line in fp:
        #print('line ' + line)
        file = pic_dir + "/" + line.rstrip()
        try:
            im = Image.open(file)
        except:
            continue

        #print('file ' + file)
        for x in range(im.width):
            for y in range(im.height):
                pix = im.getpixel((x, y))
                d = RGB[0] - pix[0]
                DEV[0] += d * d
                d = RGB[1] - pix[1]
                DEV[1] += d * d
                d = RGB[2] - pix[2]
                DEV[2] += d * d
         im.close()

DEV[0] /= pixCount
DEV[1] /= pixCount
DEV[2] /= pixCount
DEV = np.sqrt(DEV)

RGB[0] /= 255
RGB[1] /= 255
RGB[2] /= 255

DEV[0] /= 255
DEV[1] /= 255
DEV[2] /= 255

print('mean=[' + str(RGB[0]) + ', ' + str(RGB[1]) + ', ' + str(RGB[2]) + '],')
print('std=[' + str(DEV[0]) + ', ' + str(DEV[1]) + ', ' + str(DEV[2]) + ']')

#  6764 files:
# mean=[0.3876046197, 0.3751385941, 0.3667266388],
# std=[0.2649736267, 0.2584158245, 0.2701408752]

Resulting in this initial loader, keras version works, this one untested.

from torchvision import models, transforms
from PIL import Image

pair_dir = '~/pb'
pic_dir = '~/images/prun299'
image_size = 299

normalize = transforms.Normalize(
 mean=[0.3876046197, 0.3751385941, 0.3667266388],
 std=[0.2649736267, 0.2584158245, 0.2701408752]
)
preprocess = transforms.Compose([
   transforms.Scale(256),
   transforms.CenterCrop(image_size),
   transforms.ToTensor(),
   normalize
])

file_map = {}

def load_preproc():
    print('Loading pics')
    scan_file(pair_dir + '/test.neg')
    scan_file(pair_dir + '/test.pos')
    scan_file(pair_dir + '/train.pos')
    scan_file(pair_dir + '/train.neg')

def scan_file(fname):
    print('Scan file: ' + fname)
    ct = 0
    ct2 = 0
    with open(fname) as fp:
        for line in fp:
            fname1, fname2 = line.split()
            if (type(file_map.get(fname1, None)) is NoneType):
                ct += 1
                img_pil = Image.open(pic_dir + '/' + fname1)
                img_tensor = preprocess(img_pil)
                img_tensor.unsqueeze_(0)
                file_map[fname1] = img_tensor
            else:
                ct2 += 1

            if (type(file_map.get(fname2, None)) is NoneType):
                ct += 1
                img_pil = Image.open(pic_dir + '/' + fname2)
                img_tensor = preprocess(img_pil)
                img_tensor.unsqueeze_(0)
                file_map[fname2] = img_tensor
            else:
                ct2 += 1

    print('    loaded: ' + str(ct) + ' already loaded: ' + str(ct2))

Here's how the keras file-load-preproc portion looks:

from keras.preprocessing import image
from keras.applications.inception_v3 import preprocess_input

                im1 = image.load_img(pic_dir + '/' + fname1, target_size=input_dim)
                x = image.img_to_array(im1)
                x = np.expand_dims(x, axis=0)
                x = preprocess_input(x)[0]
                file_map[fname1] = x

phobrain avatar Sep 04 '17 00:09 phobrain

Finding that a 1080 ti is limited to training set size of <50 pairs of 299x299x3 pics with a Siamese Inception v3, and given that Inception v3 was trained on batches of 1600 pics distributed 32 to a GPU, it looks like it would require a grant to put the machinery together to do what I want, or a team of collaborators willing to pitch in. Pairs might benefit from larger batches than single pics? In any case, the ideal case for exploration now seems to be using greyscale histograms (150 dimensions), scaling up to rgb 32x32x32 histograms.

phobrain avatar Sep 26 '17 10:09 phobrain