# HelmholtzMachineDemo.py # 6/12/2015 # Kevin Kirby, Northern Kentucky University """ A basic Python implementation/demonstration of a Helmholtz Machine. A companion to my 2006 tutorial. @author: Kevin Kirby """ import numpy as np # Some utility functions def sigmoid(u): """ A vectorized version of the logistic sigmoid firing function. """ return 1/(1 + np.exp(np.negative(u))) def bernoulliSample( probs ): """ Generate a binary vector according to a vector of probabilities. """ return map( (lambda p : np.random.choice( (1.0,0.0), p=(p,1-p) )), probs ) def intToPattern( i, n ): """ An length n tuple of 0.,1.'s corresponding to the binary form of i. """ return tuple( map(float, bin(i)[2:].zfill(n))) def patternToString( d, clumpsize ): """ For prettyprinting a real binary pattern list in space-separated clumps. """ s= "" for k in range( len(d) ): if k%clumpsize == 0: s+= " " s += "1" if d[k]>0 else "0" return s # An toy example of a world of tiny fluctuating images. vhWorld={ # Vertically striped 3x3 images occur with probability 2/18 each. (1,0,0, 1,0,0, 1,0,0) : 2/18., (0,1,0, 0,1,0, 0,1,0) : 2/18., (0,0,1, 0,0,1, 0,0,1) : 2/18., (1,1,0, 1,1,0, 1,1,0) : 2/18., (1,0,1, 1,0,1, 1,0,1) : 2/18., (0,1,1, 0,1,1, 0,1,1) : 2/18., # Horizontally striped 3x3 images occur with probability 1/18 each. (1,1,1, 0,0,0, 0,0,0) : 1/18., (0,0,0, 1,1,1, 0,0,0) : 1/18., (0,0,0, 0,0,0, 1,1,1) : 1/18., (1,1,1, 1,1,1, 0,0,0) : 1/18., (1,1,1, 0,0,0, 1,1,1) : 1/18., (0,0,0, 1,1,1, 1,1,1) : 1/18. # The remaining 500 (of 512 possible) 3x3 images occur with probablility 0. } def sampleFromWorld( world ): """ Return a random pattern from world according to its associated probability. """ index= np.random.choice( len( world ), p=world.values() ) return list( world.keys()[index] ) # A basic Helmholtz machine class. class HelmholtzMachine: def __init__( self, nTop, nMid, nBottom ): """ Construct a 3-layer HM with the given number of neurons at each level. """ # Dimension of pattern vector visible at the bottom. self.dimension= nBottom # Generative weights (top down) self.bG= np.zeros( nTop ) self.wG= np.zeros( [nMid, nTop+1] ) self.vG= np.zeros( [nBottom, nMid+1] ) # Recognition weights (bottom up) self.vR= np.zeros( [nMid, nBottom+1] ) self.wR= np.zeros( [nTop, nMid+1] ) # Some default learning rates self.eps= 0.01 # default learning rate self.epsBottom= 0.15 # special learning rate for bottom layer def generate( self ): """ Generate (fantasize) a pattern d. """ x= bernoulliSample( sigmoid( self.bG ) ) y= bernoulliSample( sigmoid( np.dot(self.wG, x+[1]) ) ) d= bernoulliSample( sigmoid( np.dot(self.vG, y+[1]) ) ) return d def wake( self, d ): """ One wake learning cycle, given pattern d applied at the bottom. """ # Upward (recognition) pass. y= bernoulliSample( sigmoid( np.dot(self.vR, d+[1]) ) ) x= bernoulliSample( sigmoid( np.dot(self.wR, y+[1]) ) ) # Downward (generation) pass. px= sigmoid( self.bG ) py= sigmoid( np.dot(self.wG, x+[1]) ) pd= sigmoid( np.dot(self.vG, y+[1]) ) # Adjust generative weights by delta rule. self.bG += self.eps * ( x - px ) self.wG += self.eps * np.outer( y - py, x+[1] ) self.vG += self.epsBottom * np.outer( d - pd, y+[1] ) def sleep( self ): """ One sleep learning cycle. """ # Initiate a dream! x= bernoulliSample( sigmoid( self.bG ) ) # Pass dream signal downward to generate a pattern. y= bernoulliSample( sigmoid( np.dot(self.wG, x+[1]) ) ) d= bernoulliSample( sigmoid( np.dot(self.vG, y+[1]) ) ) # Pass back up through recognition network, saving computed probabilities. py= sigmoid( np.dot(self.vR, d+[1]) ) px= sigmoid( np.dot(self.wR, y+[1]) ) # Adjust recognition weights by delta rule. self.vR += self.epsBottom * np.outer( y - py, d+[1] ) self.wR += self.eps * np.outer( x - px, y+[1] ) def learn( self, world, nCycles ): """ Run for a given number of wake-sleep cycles to learn this world. """ for t in range( nCycles ): d= sampleFromWorld( world ) self.wake( d ) self.sleep() def estimateModel( self, nSamples ): """ Generate many samples in order to estimate pattern probabilities. probG[d] will be the estimated probability that the HM generates pattern d. """ dp= 1.0 / nSamples probG= dict() for count in range(nSamples): d= tuple( self.generate() ) if d in probG: probG[ d ] += dp else: probG[ d ] = dp return probG def reportModelComparison( world, machine ): """ Print a table that shows how closely the machine models the world. """ # Get table of (d,pG(d)) by doing a lot of sampling from the machine. probG= machine.estimateModel( 100000 ) # Loop through all the patterns in the world. dim= machine.dimension assert dim <= 16 # This report only good for "toy" (low-dimension) examples klSum= 0 # for accumulating KL divergence plist= [] # to hold a list (d,p(d), pG(d)) to print as a summary for i in range (2**dim): d= intToPattern(i,dim) p= world[d] if d in world else 0 # probablity the world shows d pG= probG[d] if d in probG else 0 # probability the machine generates d if (p > 0) and (pG > 0): klSum += p * np.log( p / pG ) if (p > 0.001 ) or (pG > 0.001): plist += [(d,p,pG)] # only interested in reporting on non-miniscule probabilities # Sort the list of (d,p(d),pG(d)) by p(d) descending and print. list.sort( plist, key=lambda x: x[1], reverse=True ) print "The world vs. the Helmholtz machine's current model of the world:" for (d,p,pG) in plist: print patternToString(d,3), " %5.3f" % p, " %5.3f" % pG print "KL(world,machine) = ", klSum # Demonstrate how a small vanilla Helmholtz machine can learn to model vhWorld. # It's sensitive to randomness in sampling, but most of the time it can learn the # world nicely in under 80,000 cycles. hm= HelmholtzMachine( 1, 4, 9 ) hm.learn( vhWorld, 80000 ) reportModelComparison( vhWorld, hm )