-
Notifications
You must be signed in to change notification settings - Fork 15
WIP: deep narrow paths mutation #10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 5 commits
8fbd1f1
db78db4
07d1f39
0837c10
f67c730
c908caf
adbc215
20e5b34
22a786e
91cbde0
9c3238a
6362dc8
1130da4
12a95ae
22ca6aa
126e84d
331e06f
49b5c4d
c0617ea
82cdacf
9117d05
d792d10
75bd1ea
72f2fee
e2e09a4
6deb0ad
05ae843
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,3 +7,6 @@ venv/ | |
| # ignore py compiled etc. files | ||
| *.pyc | ||
| *.pyo | ||
|
|
||
| # ignore .idea | ||
| .idea/ | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -89,6 +89,9 @@ | |
| MUTPB_FV_SAMPLE_MAXN = 32 # max n of instantiations to sample from top k | ||
| MUTPB_FV_QUERY_LIMIT = 256 # SPARQL query limit for the top k instantiations | ||
| MUTPB_SP = 0.05 # prob to simplify pattern (warning: can restrict exploration) | ||
| MUTPB_DN = 0.5 # prob to try adding a deep and narrow path to a pattern | ||
| MUTPB_DN_PS_MAX_N = 10 # Max steps in the deep narrow path | ||
|
||
| MUTPB_DN_AVG_LIMIT = 10 # Max avg. reachable Nodes | ||
|
||
|
|
||
| # fusion of target candidates: | ||
| FUSION_SAMPLES_PER_CLASS = 500 # only use up to n training samples per class | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -54,6 +54,8 @@ | |
| from gp_query import query_stats | ||
| from gp_query import query_time_hard_exceeded | ||
| from gp_query import query_time_soft_exceeded | ||
| from gp_query import useful_path_query | ||
| from gp_query import useful_path_inst_query | ||
| from gp_query import variable_substitution_query | ||
| from graph_pattern import canonicalize | ||
| from graph_pattern import gen_random_var | ||
|
|
@@ -684,6 +686,121 @@ def mutate_fix_var( | |
| ] | ||
| return res | ||
|
|
||
| def mutate_deep_narrow( | ||
|
||
| sparql, | ||
| timeout, | ||
| child, | ||
|
||
| gtp_scores, | ||
| dn_path_steps_max_n=config.MUTPB_DN_PS_MAX_N, | ||
| direct=None, | ||
|
||
| childin=False, | ||
|
||
| limit=config.MUTPB_FV_QUERY_LIMIT, # TODO: Limit benutzen? | ||
| ): | ||
| if not child.matching_node_pairs: | ||
|
||
| ev = evaluate( | ||
| sparql, timeout, gtp_scores, child) # TODO: Muss hier run/gen dazu? | ||
|
||
| update_individuals([child], [ev]) | ||
| gtps = child.matching_node_pairs | ||
| if not gtps: | ||
| return [child] | ||
| #TODO: testen, wie die Verteilung gut ist | ||
| n = random.choice(range(dn_path_steps_max_n))+1 | ||
| n = 2 | ||
|
||
| node = [SOURCE_VAR] | ||
|
||
| for i in range(n): | ||
| node.append(Variable('n%i' % i)) | ||
|
||
| node.append(TARGET_VAR) | ||
| hop = [Variable('p%i' % i) for i in range(n + 1)] | ||
|
||
| # TODO: Entfernern, wenn direct einfach immer random gewählt werden soll | ||
| if direct is None or len(direct) != n + 1: | ||
| logger.debug( | ||
| 'No direction chosen, or direction tuple with false length' | ||
| ) | ||
| direct = [0 for _ in range(n + 1)] | ||
|
||
| gp_helper = [] | ||
| for i in range(n + 1): | ||
| if direct[i] == 0: | ||
| direct[i] = random.choice([-1, 1]) | ||
|
||
| if direct[i] == 1: | ||
| gp_helper.append( | ||
| GraphPattern([(node[i], hop[i], node[i + 1])]) | ||
| ) | ||
| else: | ||
| gp_helper.append( | ||
| GraphPattern([(node[i + 1], hop[i], node[i])]) | ||
| ) | ||
| # Queries für die Schritte | ||
| valueblocks_s = {} | ||
| valueblocks_t = {} | ||
| for i in range(int((n / 2) + 1)): | ||
|
||
| if i < int(n/2): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comments explaining case |
||
| t, q_res = useful_path_query( | ||
| sparql, | ||
| timeout, | ||
| child, | ||
| hop[i], | ||
| node[i+1], | ||
| valueblocks_s, | ||
| gp_helper[:i + 1], | ||
| SOURCE_VAR, | ||
| gp_in=childin, | ||
| ) | ||
| if not q_res: | ||
| return [child] | ||
| valueblocks_s[hop[i]] = { | ||
| (hop[i],): random.sample( | ||
| [(q_r,) for q_r in q_res], | ||
| min(10, len(q_res)) | ||
|
||
| ) | ||
| } | ||
| if n-i > i: | ||
| t, q_res = useful_path_query( | ||
| sparql, | ||
| timeout, | ||
| child, | ||
| hop[n-i], | ||
| node[n-i], | ||
| valueblocks_t, | ||
| gp_helper[n - i:], | ||
| TARGET_VAR, | ||
| gp_in=childin, | ||
| ) | ||
| if not q_res: | ||
| return [child] | ||
| valueblocks_t[hop[n-i]] = { | ||
| (hop[n-i],): random.sample( | ||
| [(q_r,) for q_r in q_res], | ||
| min(10, len(q_res)) | ||
| ) | ||
| } | ||
|
|
||
| # Query fürs Ergebnis | ||
| # gemeinsamer source/target-block, damit nur "richtige" Pfade gefunden | ||
| # werden | ||
| valueblocks = {} | ||
| valueblocks.update(valueblocks_s) | ||
| valueblocks.update(valueblocks_t) | ||
| t, q_res = useful_path_inst_query( | ||
| sparql, | ||
| timeout, | ||
| child, | ||
| hop, | ||
| valueblocks, | ||
| gp_helper, | ||
| gp_in=childin | ||
| ) | ||
| if not q_res: | ||
| return [child] | ||
| res = [] | ||
| for inst in q_res: | ||
| child_inst = GraphPattern([ | ||
| (node[i], inst[i], node[i + 1]) if direct[i] == 1 | ||
| else (node[i + 1], inst[i], node[i]) | ||
| for i in range(n + 1) | ||
| ]) | ||
| res.append(GraphPattern(child + child_inst)) | ||
| return res | ||
|
|
||
|
|
||
| def mutate_simplify_pattern(gp): | ||
| if len(gp) < 2: | ||
|
|
@@ -797,6 +914,7 @@ def mutate( | |
| pb_mv=config.MUTPB_MV, | ||
| pb_sp=config.MUTPB_SP, | ||
| pb_sv=config.MUTPB_SV, | ||
| pb_dn=config.MUTPB_DN, | ||
| ): | ||
| # mutate patterns: | ||
| # grow: select random identifier and convert them into a var (local) | ||
|
|
@@ -837,8 +955,14 @@ def mutate( | |
| else: | ||
| children = [child] | ||
|
|
||
|
|
||
| # TODO: deep & narrow paths mutation | ||
| helper = [] | ||
| for child in children: | ||
|
||
| if random.random() < pb_dn: | ||
| res = mutate_deep_narrow(sparql, timeout, gtp_scores, child) | ||
| helper += res | ||
| else: | ||
| helper.append(child) | ||
| children = helper | ||
|
|
||
| children = { | ||
| c if fit_to_live(c) else orig_child | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -62,6 +62,8 @@ def __init__(self): | |
| self.ask_multi_query_count = 0 | ||
| self.combined_ask_count_multi_query_count = 0 | ||
| self.variable_substitution_query_count = 0 | ||
| self.useful_path_query_count = 0 | ||
| self.useful_path_inst_query_count = 0 | ||
| self.predict_query_count = 0 | ||
| self.count_query_count = 0 | ||
|
|
||
|
|
@@ -695,6 +697,145 @@ def _var_subst_chunk_result_ext(q_res, _sel_var_and_vars, _, **kwds): | |
|
|
||
| def _var_subst_res_update(res, update, **_): | ||
| res += update | ||
|
|
||
|
|
||
| def useful_path_query( | ||
|
||
| sparql, | ||
| timeout, | ||
| graph_pattern, | ||
| var_to_fix, | ||
| var_to_count, | ||
| valueblocks, | ||
| steps, | ||
| startvar, | ||
| avglimit=config.MUTPB_DN_AVG_LIMIT, | ||
| gp_in=False, | ||
| batch_size=None | ||
| ): | ||
| _query_stats.useful_path_query_count += 1 | ||
| # TODO: evtl. je 10 pro 'gefixter' Variable von batch-size abziehen | ||
| # (weil der Block ja mit rein geht) | ||
| _values = graph_pattern.matching_node_pairs | ||
| # TODO: evtl. Schnitt mit noch nicht abgedeckten | ||
| _ret_val_mapping = {stp: [stp] for stp in graph_pattern.matching_node_pairs} | ||
| _vars_steps_and_stuff = ( | ||
| var_to_fix, var_to_count, startvar, valueblocks, steps, avglimit, gp_in | ||
| ) | ||
| return _multi_query( | ||
| sparql, timeout, graph_pattern, graph_pattern.matching_node_pairs, | ||
| batch_size, _vars_steps_and_stuff, _values, _ret_val_mapping, | ||
| _usef_path_res_init, _usef_path_chunk_q, _usef_path_chunk_result_ext, | ||
| _usef_path_res_update | ||
| ) | ||
|
|
||
|
|
||
| # noinspection PyUnusedLocal | ||
| def _usef_path_res_init(_, **kwds): | ||
| return [] | ||
|
|
||
|
|
||
| def _usef_path_chunk_q(gp, _vars_steps_and_stuff, values_chunk): | ||
| var_to_fix, var_to_count, startvar, _valueblocks, steps, avglimit, gp_in \ | ||
| = _vars_steps_and_stuff | ||
| valueblocks = { | ||
| startvar: { | ||
| (startvar,): | ||
| [(tup[0],) for tup in values_chunk] if startvar == SOURCE_VAR | ||
| else [(tup[1],) for tup in values_chunk] | ||
| } | ||
| } | ||
| valueblocks.update(_valueblocks) | ||
| return gp.to_sparql_useful_path_query( | ||
| var_to_fix, | ||
| var_to_count, | ||
| valueblocks, | ||
| steps, | ||
| startvar, | ||
| avglimit=avglimit, | ||
| gp_in=gp_in | ||
| ) | ||
|
|
||
|
|
||
| # noinspection PyUnusedLocal | ||
| def _usef_path_chunk_result_ext(q_res, _vars_steps_and_stuff, _, **kwds): | ||
| var_to_fix, var_to_count, startvar, _valueblocks, steps, avglimit, gp_in \ | ||
| = _vars_steps_and_stuff | ||
| chunk_res = [] | ||
| res_rows_path = ['results', 'bindings'] | ||
| bindings = sparql_json_result_bindings_to_rdflib( | ||
| get_path(q_res, res_rows_path, default=[]) | ||
| ) | ||
|
|
||
| for row in bindings: | ||
| # TODO: Drüber nachdenken, ob iwie die avg-outgoing auch mit | ||
| # zurückgegeben werden sollen | ||
| chunk_res.append(get_path(row, [var_to_fix])) | ||
| return chunk_res | ||
|
|
||
|
|
||
| def _usef_path_res_update(res, update, **_): | ||
| res += update | ||
|
|
||
|
|
||
| def useful_path_inst_query( | ||
|
||
| sparql, | ||
| timeout, | ||
| graph_pattern, | ||
| hop, | ||
| valueblocks, | ||
| steps, | ||
| gp_in=False, | ||
| batch_size=None | ||
| ): | ||
| _query_stats.useful_path_inst_query_count += 1 | ||
| # TODO: evtl. je 10 pro 'gefixter' Variable von batch-size abziehen | ||
| # (weil der Block ja mit rein geht) | ||
| _values = graph_pattern.matching_node_pairs | ||
| # evtl. Schnitt mit noch nicht abgedeckten | ||
| _ret_val_mapping = {stp: [stp] for stp in graph_pattern.matching_node_pairs} | ||
| _vars_steps_and_stuff = (hop, valueblocks, steps, gp_in) | ||
| return _multi_query( | ||
| sparql, timeout, graph_pattern, graph_pattern.matching_node_pairs, | ||
| batch_size, _vars_steps_and_stuff, _values, _ret_val_mapping, | ||
| _usef_path_inst_res_init, _usef_path_inst_chunk_q, | ||
| _usef_path_inst_chunk_result_ext, _usef_path_inst_res_update | ||
| ) | ||
|
|
||
|
|
||
| # noinspection PyUnusedLocal | ||
| def _usef_path_inst_res_init(_, **kwds): | ||
| return [] | ||
|
|
||
|
|
||
| def _usef_path_inst_chunk_q(gp, _vars_steps_and_stuff, values_chunk): | ||
| hop, _valueblocks, steps, gp_in = _vars_steps_and_stuff | ||
| valueblocks = { | ||
| 'st': { | ||
| (SOURCE_VAR, TARGET_VAR): values_chunk | ||
| } | ||
| } | ||
| valueblocks.update(_valueblocks) | ||
| return gp.to_sparql_useful_path_inst_query( | ||
| hop, valueblocks, steps, gp_in=gp_in | ||
| ) | ||
|
|
||
|
|
||
| # noinspection PyUnusedLocal | ||
| def _usef_path_inst_chunk_result_ext(q_res, _vars_steps_and_stuff, _, **kwds): | ||
| hop, _valueblocks, steps, gp_in = _vars_steps_and_stuff | ||
| chunk_res = [] | ||
| res_rows_path = ['results', 'bindings'] | ||
| bindings = sparql_json_result_bindings_to_rdflib( | ||
| get_path(q_res, res_rows_path, default=[]) | ||
| ) | ||
|
|
||
| for row in bindings: | ||
| chunk_res.append([get_path(row, [h]) for h in hop]) | ||
| return chunk_res | ||
|
|
||
|
|
||
| def _usef_path_inst_res_update(res, update, **_): | ||
| res += update | ||
|
|
||
|
|
||
| def generate_stps_from_gp(sparql, gp): | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
probably should be a lot lower in final version