Learning essential graphs

Creative Commons License

aGrUM

interactive online version

In [1]:
%matplotlib inline
from pylab import *
import matplotlib.pyplot as plt

import os

import pyAgrum as gum
import pyAgrum.lib.notebook as gnb


Compare learning algorithms

Essentially MIIC and 3off2 computes the essential graph (CPDAG) from data. Essential graphs are mixed graphs.

In [2]:
learner=gum.BNLearner("out/sample_asia.csv")
learner.use3off2()
learner.useNMLCorrection()
print(learner)
ge3off2=learner.learnEssentialGraph()
Filename       : out/sample_asia.csv
Size           : (500000,8)
Variables      : smoking[2], visit_to_Asia[2], lung_cancer[2], bronchitis[2], tuberculos_or_cancer[2], dyspnoea[2], positive_XraY[2], tuberculosis[2]
Induced types  : True
Missing values : False
Algorithm      : 3off2
Correction     : NML
Prior          : -

In [3]:
gnb.showDot(ge3off2.toDot());
../_images/notebooks_33-Learning_LearningAndEssentialGraphs_5_0.svg
In [4]:
learner=gum.BNLearner("out/sample_asia.csv")
learner.useMIIC()
learner.useNMLCorrection()
print(learner)
gemiic=learner.learnEssentialGraph()
gemiic
Filename       : out/sample_asia.csv
Size           : (500000,8)
Variables      : smoking[2], visit_to_Asia[2], lung_cancer[2], bronchitis[2], tuberculos_or_cancer[2], dyspnoea[2], positive_XraY[2], tuberculosis[2]
Induced types  : True
Missing values : False
Algorithm      : MIIC
Correction     : NML
Prior          : -

Out[4]:
no_name 0 smoking 2 lung_cancer 0->2 3 bronchitis 0->3 1 visit_to_Asia 7 tuberculosis 1->7 4 tuberculos_or_cancer 2->4 5 dyspnoea 3->5 4->5 6 positive_XraY 4->6 7->4

For the others methods, it is possible to obtain the essential graph from the learned BN.

In [5]:
learner=gum.BNLearner("out/sample_asia.csv")
learner.useGreedyHillClimbing()
bnHC=learner.learnBN()
print(learner)
geHC=gum.EssentialGraph(bnHC)
geHC
gnb.sideBySide(bnHC,geHC)
Filename       : out/sample_asia.csv
Size           : (500000,8)
Variables      : smoking[2], visit_to_Asia[2], lung_cancer[2], bronchitis[2], tuberculos_or_cancer[2], dyspnoea[2], positive_XraY[2], tuberculosis[2]
Induced types  : True
Missing values : False
Algorithm      : Greedy Hill Climbing
Score          : BDeu
Prior          : -

G positive_XraY positive_XraY tuberculos_or_cancer tuberculos_or_cancer positive_XraY->tuberculos_or_cancer tuberculosis tuberculosis visit_to_Asia visit_to_Asia tuberculosis->visit_to_Asia lung_cancer lung_cancer tuberculosis->lung_cancer tuberculos_or_cancer->tuberculosis dyspnoea dyspnoea tuberculos_or_cancer->dyspnoea tuberculos_or_cancer->lung_cancer bronchitis bronchitis tuberculos_or_cancer->bronchitis dyspnoea->bronchitis smoking smoking lung_cancer->smoking bronchitis->tuberculosis bronchitis->lung_cancer bronchitis->smoking
no_name 0 smoking 2 lung_cancer 0->2 3 bronchitis 0->3 1 visit_to_Asia 7 tuberculosis 1->7 2->3 4 tuberculos_or_cancer 2->4 2->7 3->4 5 dyspnoea 3->5 3->7 4->5 6 positive_XraY 4->6 4->7
In [6]:
learner=gum.BNLearner("out/sample_asia.csv")
learner.useLocalSearchWithTabuList()
print(learner)
bnTL=learner.learnBN()
geTL=gum.EssentialGraph(bnTL)
geTL
gnb.sideBySide(bnTL,geTL)
Filename       : out/sample_asia.csv
Size           : (500000,8)
Variables      : smoking[2], visit_to_Asia[2], lung_cancer[2], bronchitis[2], tuberculos_or_cancer[2], dyspnoea[2], positive_XraY[2], tuberculosis[2]
Induced types  : True
Missing values : False
Algorithm      : Local Search with Tabu List
Tabu list size : 2
Score          : BDeu
Prior          : -

G positive_XraY positive_XraY tuberculosis tuberculosis tuberculosis->positive_XraY visit_to_Asia visit_to_Asia tuberculosis->visit_to_Asia tuberculos_or_cancer tuberculos_or_cancer tuberculosis->tuberculos_or_cancer lung_cancer lung_cancer tuberculosis->lung_cancer tuberculos_or_cancer->positive_XraY dyspnoea dyspnoea tuberculos_or_cancer->dyspnoea tuberculos_or_cancer->lung_cancer bronchitis bronchitis tuberculos_or_cancer->bronchitis dyspnoea->bronchitis smoking smoking smoking->positive_XraY smoking->tuberculos_or_cancer smoking->dyspnoea smoking->lung_cancer smoking->bronchitis
no_name 0 smoking 2 lung_cancer 0->2 3 bronchitis 0->3 4 tuberculos_or_cancer 0->4 5 dyspnoea 0->5 6 positive_XraY 0->6 1 visit_to_Asia 7 tuberculosis 1->7 2->4 3->5 4->3 4->5 4->6 7->2 7->4 7->6

Hence we can compare the 4 algorithms.

In [7]:
(
  gnb.flow.clear()
  .add(ge3off2,"Essential graph from 3off2")
  .add(gemiic,"Essential graph from miic")
  .add(bnHC,"BayesNet from GHC")
  .add(geHC,"Essential graph from GHC")
  .add(bnTL,"BayesNet from TabuList")
  .add(geTL,"Essential graph from TabuList")
  .display()
)
no_name 0 smoking 2 lung_cancer 0->2 3 bronchitis 0->3 1 visit_to_Asia 7 tuberculosis 1->7 4 tuberculos_or_cancer 2->4 5 dyspnoea 3->5 4->5 6 positive_XraY 4->6 7->4
Essential graph from 3off2
no_name 0 smoking 2 lung_cancer 0->2 3 bronchitis 0->3 1 visit_to_Asia 7 tuberculosis 1->7 4 tuberculos_or_cancer 2->4 5 dyspnoea 3->5 4->5 6 positive_XraY 4->6 7->4
Essential graph from miic
G positive_XraY positive_XraY tuberculos_or_cancer tuberculos_or_cancer positive_XraY->tuberculos_or_cancer tuberculosis tuberculosis visit_to_Asia visit_to_Asia tuberculosis->visit_to_Asia lung_cancer lung_cancer tuberculosis->lung_cancer tuberculos_or_cancer->tuberculosis dyspnoea dyspnoea tuberculos_or_cancer->dyspnoea tuberculos_or_cancer->lung_cancer bronchitis bronchitis tuberculos_or_cancer->bronchitis dyspnoea->bronchitis smoking smoking lung_cancer->smoking bronchitis->tuberculosis bronchitis->lung_cancer bronchitis->smoking
BayesNet from GHC
no_name 0 smoking 2 lung_cancer 0->2 3 bronchitis 0->3 1 visit_to_Asia 7 tuberculosis 1->7 2->3 4 tuberculos_or_cancer 2->4 2->7 3->4 5 dyspnoea 3->5 3->7 4->5 6 positive_XraY 4->6 4->7
Essential graph from GHC
G positive_XraY positive_XraY tuberculosis tuberculosis tuberculosis->positive_XraY visit_to_Asia visit_to_Asia tuberculosis->visit_to_Asia tuberculos_or_cancer tuberculos_or_cancer tuberculosis->tuberculos_or_cancer lung_cancer lung_cancer tuberculosis->lung_cancer tuberculos_or_cancer->positive_XraY dyspnoea dyspnoea tuberculos_or_cancer->dyspnoea tuberculos_or_cancer->lung_cancer bronchitis bronchitis tuberculos_or_cancer->bronchitis dyspnoea->bronchitis smoking smoking smoking->positive_XraY smoking->tuberculos_or_cancer smoking->dyspnoea smoking->lung_cancer smoking->bronchitis
BayesNet from TabuList
no_name 0 smoking 2 lung_cancer 0->2 3 bronchitis 0->3 4 tuberculos_or_cancer 0->4 5 dyspnoea 0->5 6 positive_XraY 0->6 1 visit_to_Asia 7 tuberculosis 1->7 2->4 3->5 4->3 4->5 4->6 7->2 7->4 7->6
Essential graph from TabuList
In [ ]: