Skip to content

Commit 053a58b

Browse files
aryan26royadam2392
andauthored
[ENH] Add the ability to find Proper Possibly Directed Paths (#112)
* Added function to return a list of possibly directed paths between two nodes --------- Signed-off-by: Aryan Roy <aryanroy5678@gmail.com> Co-authored-by: Adam Li <adam2392@gmail.com>
1 parent 68de868 commit 053a58b

File tree

3 files changed

+367
-1
lines changed

3 files changed

+367
-1
lines changed

doc/whats_new/v0.2.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ Changelog
3333
- |Feature| Implement functions for converting between a DAG and PDAG and CPDAG for generating consistent extensions of a CPDAG for example. These functions are :func:`pywhy_graphs.algorithms.pdag_to_cpdag`, :func:`pywhy_graphs.algorithms.pdag_to_dag` and :func:`pywhy_graphs.algorithms.dag_to_cpdag`, by `Adam Li`_ (:pr:`102`)
3434
- |API| Remove poetry based setup, by `Adam Li`_ (:pr:`110`)
3535
- |Feature| Implement and test function to validate PAG, by `Aryan Roy`_ (:pr:`100`)
36+
- |Feature| Implement and test function to find all the proper possibly directed paths, by `Aryan Roy`_ (:pr:`112`)
3637

3738
Code and Documentation Contributors
3839
-----------------------------------

pywhy_graphs/algorithms/generic.py

Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
"dag_to_mag",
2020
"is_maximal",
2121
"all_vstructures",
22+
"proper_possibly_directed_path",
2223
]
2324

2425

@@ -855,3 +856,188 @@ def all_vstructures(G: nx.DiGraph, as_edges: bool = False):
855856
else:
856857
vstructs.add((p1, node, p2)) # type: ignore
857858
return vstructs
859+
860+
861+
def _check_back_arrow(G: ADMG, X, Y: set):
862+
"""Retrieve all the neigbors of X that do not have
863+
an arrow pointing back to it.
864+
865+
Parameters
866+
----------
867+
G : DiGraph
868+
A directed graph.
869+
X : Node
870+
Y : Set
871+
A set of neigbors of X.
872+
873+
Returns
874+
-------
875+
out : set
876+
A set of all the neighbors of X that do not have an arrow pointing
877+
back to it.
878+
"""
879+
out = set()
880+
881+
for elem in Y:
882+
if not (
883+
G.has_edge(X, elem, G.bidirected_edge_name) or G.has_edge(elem, X, G.directed_edge_name)
884+
):
885+
out.update(elem)
886+
887+
return out
888+
889+
890+
def _get_neighbors_of_set(G, X: set):
891+
"""Retrieve all the neigbors of X when X has more than one element.
892+
893+
Note that if X is not a set, graph.neighbors(X) is sufficient.
894+
895+
Parameters
896+
----------
897+
G : DiGraph
898+
A directed graph.
899+
X : Set
900+
901+
Returns
902+
-------
903+
out : set
904+
A set of all the neighbors of X.
905+
"""
906+
907+
out = set()
908+
909+
for elem in X:
910+
elem_neighbors = set(G.neighbors(elem))
911+
elem_possible_neighbors = _check_back_arrow(G, elem, elem_neighbors)
912+
to_remove = X.intersection(elem_possible_neighbors)
913+
elem_neighbors = elem_possible_neighbors - to_remove
914+
915+
if len(elem_neighbors) != 0:
916+
for nbh in elem_neighbors:
917+
temp = (elem,)
918+
temp = temp + (nbh,)
919+
out.add(temp)
920+
return out
921+
922+
923+
def _recursively_find_pd_paths(G, X, paths, Y):
924+
"""Recursively finds all the possibly directed paths for a given
925+
graph.
926+
927+
Parameters
928+
----------
929+
G : DiGraph
930+
A directed graph.
931+
X : Set
932+
Source.
933+
paths : Set
934+
Set of initial paths from X.
935+
Y : Set
936+
Destination
937+
938+
Returns
939+
-------
940+
out : set
941+
A set of all the possibly directed paths.
942+
"""
943+
944+
counter = 0
945+
new_paths = set()
946+
947+
for elem in paths:
948+
cur_elem = elem[-1]
949+
950+
if cur_elem in Y:
951+
new_paths.add(elem)
952+
continue
953+
954+
nbr_temp = G.neighbors(cur_elem)
955+
nbr_possible = _check_back_arrow(G, cur_elem, nbr_temp)
956+
957+
if len(nbr_possible) == 0:
958+
new_paths = new_paths + (elem,)
959+
960+
possible_end = nbr_possible.intersection(Y)
961+
962+
if len(possible_end) != 0:
963+
for nbr in possible_end:
964+
temp_path = elem
965+
temp_path = temp_path + (nbr,)
966+
new_paths.add(temp_path)
967+
968+
remaining_nodes = nbr_possible - possible_end
969+
remaining_nodes = (
970+
remaining_nodes
971+
- remaining_nodes.intersection(set(elem))
972+
- remaining_nodes.intersection(X)
973+
)
974+
975+
temp_set = set()
976+
for nbr in remaining_nodes:
977+
temp_paths = elem
978+
temp_paths = temp_paths + (nbr,)
979+
temp_set.add(temp_paths)
980+
981+
new_paths.update(_recursively_find_pd_paths(G, X, temp_set, Y))
982+
983+
return new_paths
984+
985+
986+
def proper_possibly_directed_path(G, X: Optional[Set], Y: Optional[Set]):
987+
"""Find all the proper possibly directed paths in a graph. A proper possibly directed
988+
path from X to Y is a set of edges with just the first node in X and none of the edges
989+
with an arrow pointing back to X.
990+
991+
Parameters
992+
----------
993+
G : DiGraph
994+
A directed graph.
995+
X : Set
996+
Source.
997+
Y : Set
998+
Destination
999+
1000+
Returns
1001+
-------
1002+
out : set
1003+
A set of all the proper possibly directed paths.
1004+
1005+
Examples
1006+
--------
1007+
The function generates a set of tuples containing all the valid
1008+
proper possibly directed paths from X to Y.
1009+
1010+
>>> import pywhy_graphs
1011+
>>> from pywhy_graphs import PAG
1012+
>>> pag = PAG()
1013+
>>> pag.add_edge("A", "G", pag.directed_edge_name)
1014+
>>> pag.add_edge("G", "C", pag.directed_edge_name)
1015+
>>> pag.add_edge("C", "H", pag.directed_edge_name)
1016+
>>> pag.add_edge("Z", "C", pag.circle_edge_name)
1017+
>>> pag.add_edge("C", "Z", pag.circle_edge_name)
1018+
>>> pag.add_edge("Y", "X", pag.directed_edge_name)
1019+
>>> pag.add_edge("X", "Z", pag.directed_edge_name)
1020+
>>> pag.add_edge("Z", "K", pag.directed_edge_name)
1021+
>>> Y = {"H", "K"}
1022+
>>> X = {"Y", "A"}
1023+
>>> pywhy_graphs.proper_possibly_directed_path(pag, X, Y)
1024+
{('A', 'G', 'C', 'H'), ('Y', 'X', 'Z', 'C', 'H'), ('Y', 'X', 'Z', 'K'), ('A', 'G', 'C', 'Z', 'K')}
1025+
1026+
"""
1027+
1028+
if isinstance(X, set):
1029+
x_neighbors = _get_neighbors_of_set(G, X)
1030+
else:
1031+
nbr_temp = G.neighbors(X)
1032+
nbr_possible = _check_back_arrow(nbr_temp)
1033+
x_neighbors = []
1034+
1035+
for elem in nbr_possible:
1036+
temp = dict()
1037+
temp[0] = X
1038+
temp[1] = elem
1039+
x_neighbors.append(temp)
1040+
1041+
path_list = _recursively_find_pd_paths(G, X, x_neighbors, Y)
1042+
1043+
return path_list

pywhy_graphs/algorithms/tests/test_generic.py

Lines changed: 180 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import pytest
33

44
import pywhy_graphs
5-
from pywhy_graphs import ADMG
5+
from pywhy_graphs import ADMG, PAG
66
from pywhy_graphs.algorithms import all_vstructures
77

88

@@ -496,3 +496,182 @@ def test_all_vstructures():
496496
# Assert that the returned values are as expected
497497
assert len(v_structs_edges) == 0
498498
assert len(v_structs_tuples) == 0
499+
500+
501+
def test_proper_possibly_directed():
502+
# X <- Y <-> Z <-> H; Z -> X
503+
504+
admg = ADMG()
505+
admg.add_edge("Y", "X", admg.directed_edge_name)
506+
admg.add_edge("X", "Z", admg.directed_edge_name)
507+
admg.add_edge("Z", "H", admg.directed_edge_name)
508+
509+
Y = {"H"}
510+
X = {"Y"}
511+
512+
correct = {("Y", "X", "Z", "H")}
513+
out = pywhy_graphs.proper_possibly_directed_path(admg, X, Y)
514+
assert correct == out
515+
516+
admg = ADMG()
517+
admg.add_edge("A", "X", admg.directed_edge_name)
518+
admg.add_edge("Y", "X", admg.directed_edge_name)
519+
admg.add_edge("X", "Z", admg.directed_edge_name)
520+
admg.add_edge("Z", "H", admg.directed_edge_name)
521+
522+
Y = {"H"}
523+
X = {"Y", "A"}
524+
525+
correct = {("Y", "X", "Z", "H"), ("A", "X", "Z", "H")}
526+
out = pywhy_graphs.proper_possibly_directed_path(admg, X, Y)
527+
assert correct == out
528+
529+
admg = ADMG()
530+
admg.add_edge("X", "A", admg.directed_edge_name)
531+
admg.add_edge("Y", "X", admg.directed_edge_name)
532+
admg.add_edge("X", "Z", admg.directed_edge_name)
533+
admg.add_edge("Z", "H", admg.directed_edge_name)
534+
535+
Y = {"H"}
536+
X = {"Y", "A"}
537+
538+
correct = {("Y", "X", "Z", "H")}
539+
out = pywhy_graphs.proper_possibly_directed_path(admg, X, Y)
540+
assert correct == out
541+
542+
admg = ADMG()
543+
admg.add_edge("X", "A", admg.directed_edge_name)
544+
admg.add_edge("Y", "X", admg.directed_edge_name)
545+
admg.add_edge("X", "Z", admg.directed_edge_name)
546+
admg.add_edge("Z", "H", admg.directed_edge_name)
547+
admg.add_edge("K", "Z", admg.directed_edge_name)
548+
549+
Y = {"H", "K"}
550+
X = {"Y", "A"}
551+
552+
correct = {("Y", "X", "Z", "H")}
553+
out = pywhy_graphs.proper_possibly_directed_path(admg, X, Y)
554+
assert correct == out
555+
556+
admg = ADMG()
557+
admg.add_edge("A", "X", admg.directed_edge_name)
558+
admg.add_edge("Y", "X", admg.directed_edge_name)
559+
admg.add_edge("X", "Z", admg.directed_edge_name)
560+
admg.add_edge("Z", "H", admg.directed_edge_name)
561+
admg.add_edge("Z", "K", admg.directed_edge_name)
562+
563+
Y = {"H", "K"}
564+
X = {"Y", "A"}
565+
566+
correct = {
567+
("Y", "X", "Z", "K"),
568+
("A", "X", "Z", "K"),
569+
("Y", "X", "Z", "H"),
570+
("A", "X", "Z", "H"),
571+
}
572+
out = pywhy_graphs.proper_possibly_directed_path(admg, X, Y)
573+
assert correct == out
574+
575+
admg = ADMG()
576+
admg.add_edge("A", "G", admg.directed_edge_name)
577+
admg.add_edge("G", "C", admg.directed_edge_name)
578+
admg.add_edge("C", "H", admg.directed_edge_name)
579+
admg.add_edge("Y", "X", admg.directed_edge_name)
580+
admg.add_edge("X", "Z", admg.directed_edge_name)
581+
admg.add_edge("Z", "K", admg.directed_edge_name)
582+
583+
Y = {"H", "K"}
584+
X = {"Y", "A"}
585+
586+
correct = {("Y", "X", "Z", "K"), ("A", "G", "C", "H")}
587+
out = pywhy_graphs.proper_possibly_directed_path(admg, X, Y)
588+
assert correct == out
589+
590+
admg = ADMG()
591+
admg.add_edge("A", "G", admg.directed_edge_name)
592+
admg.add_edge("G", "C", admg.directed_edge_name)
593+
admg.add_edge("C", "H", admg.directed_edge_name)
594+
admg.add_edge("Z", "C", admg.directed_edge_name)
595+
admg.add_edge("Y", "X", admg.directed_edge_name)
596+
admg.add_edge("X", "Z", admg.directed_edge_name)
597+
admg.add_edge("Z", "K", admg.directed_edge_name)
598+
599+
Y = {"H", "K"}
600+
X = {"Y", "A"}
601+
602+
correct = {("Y", "X", "Z", "K"), ("Y", "X", "Z", "C", "H"), ("A", "G", "C", "H")}
603+
out = pywhy_graphs.proper_possibly_directed_path(admg, X, Y)
604+
assert correct == out
605+
606+
admg = ADMG()
607+
admg.add_edge("A", "G", admg.directed_edge_name)
608+
admg.add_edge("A", "H", admg.directed_edge_name)
609+
admg.add_edge("K", "G", admg.directed_edge_name)
610+
admg.add_edge("K", "H", admg.directed_edge_name)
611+
612+
Y = {"G", "H"}
613+
X = {"A", "K"}
614+
615+
correct = {("K", "H"), ("K", "G"), ("A", "G"), ("A", "H")}
616+
out = pywhy_graphs.proper_possibly_directed_path(admg, X, Y)
617+
assert correct == out
618+
619+
admg = ADMG()
620+
admg.add_edge("A", "G", admg.directed_edge_name)
621+
admg.add_edge("G", "C", admg.directed_edge_name)
622+
admg.add_edge("C", "H", admg.directed_edge_name)
623+
admg.add_edge("Z", "C", admg.bidirected_edge_name)
624+
admg.add_edge("Y", "X", admg.directed_edge_name)
625+
admg.add_edge("X", "Z", admg.directed_edge_name)
626+
admg.add_edge("Z", "K", admg.directed_edge_name)
627+
628+
Y = {"H", "K"}
629+
X = {"Y", "A"}
630+
631+
correct = {
632+
("A", "G", "C", "H"),
633+
("Y", "X", "Z", "K"),
634+
}
635+
out = pywhy_graphs.proper_possibly_directed_path(admg, X, Y)
636+
assert correct == out
637+
638+
admg = ADMG()
639+
admg.add_edge("A", "G", admg.directed_edge_name)
640+
admg.add_edge("G", "C", admg.directed_edge_name)
641+
admg.add_edge("C", "H", admg.directed_edge_name)
642+
admg.add_edge("Z", "C", admg.bidirected_edge_name)
643+
admg.add_edge("Y", "X", admg.directed_edge_name)
644+
admg.add_edge("X", "Z", admg.directed_edge_name)
645+
admg.add_edge("Z", "K", admg.directed_edge_name)
646+
647+
Y = {"H", "K"}
648+
X = {"Y", "A"}
649+
650+
correct = {("Y", "X", "Z", "K"), ("A", "G", "C", "H")}
651+
out = pywhy_graphs.proper_possibly_directed_path(admg, X, Y)
652+
assert correct == out
653+
654+
655+
def test_ppdp_PAG():
656+
657+
pag = PAG()
658+
pag.add_edge("A", "G", pag.directed_edge_name)
659+
pag.add_edge("G", "C", pag.directed_edge_name)
660+
pag.add_edge("C", "H", pag.directed_edge_name)
661+
pag.add_edge("Z", "C", pag.circle_edge_name)
662+
pag.add_edge("C", "Z", pag.circle_edge_name)
663+
pag.add_edge("Y", "X", pag.directed_edge_name)
664+
pag.add_edge("X", "Z", pag.directed_edge_name)
665+
pag.add_edge("Z", "K", pag.directed_edge_name)
666+
667+
Y = {"H", "K"}
668+
X = {"Y", "A"}
669+
670+
correct = {
671+
("Y", "X", "Z", "K"),
672+
("Y", "X", "Z", "C", "H"),
673+
("A", "G", "C", "H"),
674+
("A", "G", "C", "Z", "K"),
675+
}
676+
out = pywhy_graphs.proper_possibly_directed_path(pag, X, Y)
677+
assert correct == out

0 commit comments

Comments
 (0)