# Kullback-Leibler for Bayesian networks

In [1]:

import os

%matplotlib inline

from pylab import *
import matplotlib.pyplot as plt


## Initialisation

• importing pyAgrum

• importing pyAgrum.lib tools

In [2]:

import pyAgrum as gum
import pyAgrum.lib.notebook as gnb


## Create a first BN : bn

In [3]:

bn=gum.loadBN("res/asia.bif")
# randomly re-generate parameters for every Conditional Probability Table
bn.generateCPTs()
bn

Out[3]:


## Create a second BN : bn2

In [4]:

bn2=gum.loadBN("res/asia.bif")
bn2.generateCPTs()
bn2

Out[4]:


## bn vs bn2 : different parameters

In [5]:

gnb.flow.row(bn.cpt(3),bn2.cpt(3),
captions=["a CPT in bn","same CPT in bn2 (with different parameters)"])

positive_XraY
tuberculos_or_cancer
0
1
0
0.98420.0158
1
0.14950.8505

a CPT in bn
positive_XraY
tuberculos_or_cancer
0
1
0
0.99600.0040
1
0.52460.4754

same CPT in bn2 (with different parameters)

## Exact and (Gibbs) approximated KL-divergence

In order to compute KL-divergence, we just need to be sure that the 2 distributions are defined on the same domain (same variables, etc.)

### Exact KL

In [6]:

g1=gum.ExactBNdistance(bn,bn2)
print(g1.compute())

{'klPQ': 5.0030971388003485, 'errorPQ': 0, 'klQP': 3.527314407069579, 'errorQP': 0, 'hellinger': 1.037499761443752, 'bhattacharya': 0.7726296131290553, 'jensen-shannon': 0.6385791723188207}


If the models are not on the same domain :

In [7]:

bn_different_domain=gum.loadBN("res/alarm.dsl")

# g=gum.BruteForceKL(bn,bn_different_domain) # a KL-divergence between asia and alarm ... :(
#
# would cause
#---------------------------------------------------------------------------
#OperationNotAllowed                       Traceback (most recent call last)
#
#OperationNotAllowed: this operation is not allowed : KL : the 2 BNs are not compatible (not the same vars : visit_to_Asia?)


### Gibbs-approximated KL

In [8]:

g=gum.GibbsBNdistance(bn,bn2)
g.setVerbosity(True)
g.setMaxTime(120)
g.setBurnIn(5000)
g.setEpsilon(1e-7)
g.setPeriodSize(500)

In [9]:

print(g.compute())
print("Computed in {0} s".format(g.currentTime()))

{'klPQ': 4.999350547660979, 'errorPQ': 0, 'klQP': 3.309147414499417, 'errorQP': 0, 'hellinger': 1.0241904644545237, 'bhattacharya': 0.7786179301380848, 'jensen-shannon': 0.6228385613041574}
Computed in 0.955262833 s

In [10]:

print("--")

print(g.messageApproximationScheme())
print("--")

print("Temps de calcul : {0}".format(g.currentTime()))
print("Nombre d'itérations : {0}".format(g.nbrIterations()))

--
stopped with epsilon=1e-07
--
Temps de calcul : 0.955262833
Nombre d'itérations : 176500

In [11]:

p=plot(g.history(), 'g')


### Animation of Gibbs KL

Since it may be difficult to know what happens during approximation algorithm, pyAgrum allows to follow the iteration using animated matplotlib figure

In [12]:

g=gum.GibbsBNdistance(bn,bn2)
g.setMaxTime(60)
g.setBurnIn(500)
g.setEpsilon(1e-7)
g.setPeriodSize(5000)

In [13]:

gnb.animApproximationScheme(g) # logarithmique scale for Y
g.compute()

Out[13]:

{'klPQ': 5.016855829234866,
'errorPQ': 0,
'klQP': 3.3839420489270537,
'errorQP': 0,
'hellinger': 1.0291151959143872,
'bhattacharya': 0.7805282966474169,
'jensen-shannon': 0.6282516385879086}

In [ ]: