image-dreamer
image-dreamer copied to clipboard
My changes for dreamify.py
Hi,
I spent some time and rewrote the dreamify.py a bit. I moved the data to a json file.
Dreamify.py:
#!/usr/bin/env python
# imports and basic notebook setup
from cStringIO import StringIO
import numpy as np
import scipy.ndimage as nd
import PIL.Image
import json
import os
import sys
from IPython.display import clear_output, Image, display
from google.protobuf import text_format
from optparse import OptionParser
import caffe
def savearray(a, filename, fmt='png'):
a = np.uint8(np.clip(a, 0, 255))
with open(filename, 'wb') as f:
PIL.Image.fromarray(a).save(f, fmt)
#display(Image(data=f.getvalue()))
# a couple of utility functions for converting to and from Caffe's input image layout
def preprocess(net, img):
return np.float32(np.rollaxis(img, 2)[::-1]) - net.transformer.mean['data']
def deprocess(net, img):
return np.dstack((img + net.transformer.mean['data'])[::-1])
#http://stackoverflow.com/questions/956867/how-to-get-string-objects-instead-of-unicode-ones-from-json-in-python
def byteify(input):
if isinstance(input, dict):
return {byteify(key):byteify(value) for key,value in input.iteritems()}
elif isinstance(input, list):
return [byteify(element) for element in input]
elif isinstance(input, unicode):
return input.encode('utf-8')
else:
return input
class DeepDreamer:
def __init__(self):
self.model_path = ''
self.net_fn = ''
self.param_fn = ''
self.base_img = None
self.image_mean = []
self.end = ''
self.jitter = 32
self.step_size = 1.5
def load_model_json(self, model_json_path):
model_data = None
if os.path.exists(model_json_path):
with open(model_json_path) as model_json_file:
model_data = json.load(model_json_file)
if model_data is not None:
self.model_path = byteify(model_data['model_path'])
self.net_fn = byteify(os.path.join(self.model_path, model_data['prototxt']))
self.param_fn = byteify(os.path.join(self.model_path, model_data['caffemodel']))
self.image_mean = model_data['image_mean']
self.end = byteify(model_data['end'])
def load_model(self, model_path):
self.load_model_json(model_path)
# Patching model to be able to compute gradients.
# Note that you can also manually add "force_backward: true" line to "deploy.prototxt".
self.model = caffe.io.caffe_pb2.NetParameter()
txt_data = ''
with open(self.net_fn) as net_fn_file:
txt_data = net_fn_file.read()
text_format.Merge(txt_data, self.model)
self.model.force_backward = True
with open('tmp.prototxt', 'w') as tmp_file:
tmp_file.write(str(self.model))
self.net = caffe.Classifier('tmp.prototxt', self.param_fn,
mean = np.float32([104.0, 116.0, 122.0]), # ImageNet mean, training set dependent
channel_swap = (2,1,0)) # the reference model has channels in BGR order instead of RGB
def load_img(self, image_path):
self.base_img = np.float32(PIL.Image.open(image_path))
def make_step(self):
'''Basic gradient ascent step.'''
src = self.net.blobs['data'] # input image is stored in Net's 'data' blob
dst = self.net.blobs[self.end]
ox, oy = np.random.randint(-self.jitter, self.jitter+1, 2)
src.data[0] = np.roll(np.roll(src.data[0], ox, -1), oy, -2) # apply jitter shift
self.net.forward(end=self.end)
dst.diff[:] = dst.data # specify the optimization objective
self.net.backward(start=self.end)
g = src.diff[0]
# apply normalized ascent step to the input image
src.data[:] += self.step_size/np.abs(g).mean() * g
src.data[0] = np.roll(np.roll(src.data[0], -ox, -1), -oy, -2) # unshift image
if self.clip:
bias = self.net.transformer.mean['data']
src.data[:] = np.clip(src.data, -bias, 255-bias)
def deep_dream(self, iter_n=10, octave_n=4, octave_scale=1.4,
clip=True, step_size=1.5, jitter=32):
self.step_size = step_size
self.jitter = jitter
self.clip = clip
# prepare base images for all octaves
octaves = [preprocess(self.net, self.base_img)]
for i in xrange(octave_n-1):
octaves.append(nd.zoom(octaves[-1], (1, 1.0/octave_scale,1.0/octave_scale), order=1))
src = self.net.blobs['data']
detail = np.zeros_like(octaves[-1]) # allocate image for network-produced details
for octave, octave_base in enumerate(octaves[::-1]):
h, w = octave_base.shape[-2:]
if octave > 0:
# upscale details from the previous octave
h1, w1 = detail.shape[-2:]
detail = nd.zoom(detail, (1, 1.0*h/h1,1.0*w/w1), order=1)
src.reshape(1,3,h,w) # resize the network's input image size
src.data[0] = octave_base+detail
for i in xrange(iter_n):
self.make_step()
# visualization
vis = deprocess(self.net, src.data[0])
if not clip: # adjust image contrast if clipping is disabled
vis = vis*(255.0/np.percentile(vis, 99.98))
#showarray(vis)
print octave, i, self.end, vis.shape
clear_output(wait=True)
# extract details produced on the current octave
detail = src.data[0]-octave_base
# returning the resulting image
return deprocess(self.net, src.data[0])
def __str__(self):
return ','.join(self.net.blobs.keys())
def main():
usage = '%prog'
parser = OptionParser(usage=usage)
parser.add_option('-m', '--model', action='store', dest='model', default='', help='The model to use')
parser.add_option('-i', '--iterations', action='store',
dest='iterations', default='', help='The number of iterations')
parser.add_option('-j', '--jitter', action='store', dest='jitter',
default='32', help='The amount of jitter')
parser.add_option('-s', '--stepsize', action='store', dest='stepsize',
default='1.5', help='The step size')
parser.add_option('-b', '--blobs', action='store_true', dest='show_blobs',
default=False, help='Show the blob keys and exit')
options, args = parser.parse_args()
if len(args) == 0:
print 'You must specify images to process as arguments'
sys.exit(-1)
if not os.path.exists(options.model):
print 'The model at %s was not found. Please specify a valid model.' % options.model
iterations = int(options.iterations, 10)
jitter = int(options.jitter)
step_size = float(options.stepsize)
dreamer = DeepDreamer()
dreamer.load_model(options.model)
if options.show_blobs:
print str(dreamer)
sys.exit(0)
for image_path in args:
dreamer.load_img(image_path)
image_dir = os.path.dirname(image_path)
image_name = os.path.splitext(os.path.basename(image_path))[0]
output_file_name = os.path.join(image_dir, image_name + '_dream.png')
dream_array = dreamer.deep_dream(iter_n=iterations,jitter=jitter, step_size=step_size)
savearray(dream_array, output_file_name)
if __name__ == '__main__':
main()
And an example json:
{
"model_path": "/home/vagrant/caffe/models/bvlc_googlenet/",
"prototxt": "deploy.prototxt",
"caffemodel": "bvlc_googlenet.caffemodel",
"image_mean": [104.0, 116.0, 122.0],
"end": "inception_4c/output"
}
Hmm, have you tried different caffemodels? I tried, but failed miserably...
Yes I have. The real secret is for the end parameter you need to specify the layer name from the blob list that you want to use. The "higher" layers should contain more details. It might take some trial and error to find the right layers to use. Also you will need to use a (potentially) different image mean.
What is odd... is that in the train_val.prototxt that is in the bvlc_googlenet if you grep for "mean" you get the numbers: 104, 117, 123... but the mean used in the dreamify.py is "104, 116, 122".
Also in other models it seems there is potentially something like "places_mean.mat"... from research it seems this needs to be loaded like:
"net = caffe.Classifier(MODEL_FILE, PRETRAINED) net.set_raw_scale('data',255) net.set_channel_swap('data',(2,1,0)) net.set_mean('data',np.load(caffe_root+'python/caffe/imagenet/ilsvrc_2012_mean.npy'))"
is how that should be done roughly: https://github.com/BVLC/caffe/issues/1936
Here is a sample json that works with the googlenet_places205 dataset:
{ "model_path": "/home/vagrant/caffe/models/googlenet_places205/", "prototxt": "deploy_places205.protxt", "caffemodel": "googlelet_places205_train_iter_2400000.caffemodel", "image_mean": [104.0, 116.0, 122.0], "end": "inception_4d/output" }
You probably want to fork this repo (in the github sense), commit your changes (in a more atomic fashion, rather than one huge commit), and then drop a pointer to your repo here, or maybe open another issue with Dhar to have him link to your repo from the Readme. You seem to have some really good insights into the what the code actually does :+1:
Also, you could track your own issues there ;)
The setters from your example above all yield 'Classifier' object has no attribute 'set_foo'
when used on the bvlc_googlenet model. Is that expected?
Hello Tildebyte,
The error you are seeing is not expected. I do not see that error on my end, so I find it quite odd.
Creating a repo is not a bad idea, but I do not really have time to maintain anything at the moment. I just posted the code in-case there were any useful bits or ideas that could be gained from it.
Thanks for the input, @donaldm and @tildebyte! I just got back from a vacation without internet, so it might take me a bit to catch up and check this out. I haven't forgotten you! :)
Two suggestions:
- set a default value for iterations. If not set from the command line, I get an error when it tries to int('').
parser.add_option('-i', '--iterations', action='store',
dest='iterations', default='10', help='The number of iterations')
- If Caffe was built for the NVIDIA GPU, these lines will speed things up a LOT...
# If your GPU supports CUDA and Caffe was built with CUDA support,
# uncomment the following to run Caffe operations on the GPU.
caffe.set_mode_gpu()
caffe.set_device(0) # select GPU device if multiple devices exist