From 00369fa39055ad2bd5f9c70986acc41d643768dc Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Mon, 22 Mar 2021 21:15:24 +0100 Subject: [PATCH 1/5] Change default scale --- src/pycompressor/compressing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pycompressor/compressing.py b/src/pycompressor/compressing.py index 2fc33b8..7506ec1 100644 --- a/src/pycompressor/compressing.py +++ b/src/pycompressor/compressing.py @@ -22,7 +22,7 @@ log = logging.getLogger(__name__) # Initial scale (in GeV) -Q0 = 1 +Q0 = 1.65 # Total number of flavour to 2nf+1=7 NF = 4 From b99c8d0c984cdecd46eec2668bf816b0db8b48bd Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Tue, 23 Mar 2021 09:14:01 +0100 Subject: [PATCH 2/5] shuffle enhanced set & increase random selection --- src/pycompressor/compressing.py | 22 ++++++++++++++++++++-- src/pycompressor/errfunction.py | 2 +- src/pycompressor/utils.py | 21 +++++++++++++++++++++ 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/src/pycompressor/compressing.py b/src/pycompressor/compressing.py index 7506ec1..9ba475f 100644 --- a/src/pycompressor/compressing.py +++ b/src/pycompressor/compressing.py @@ -15,9 +15,11 @@ from pycompressor.pdfgrid import XGrid from pycompressor.pdfgrid import PdfSet from pycompressor.compressor import Compress +from pycompressor.utils import map_index from pycompressor.utils import extract_index from pycompressor.estimators import ALLOWED_ESTIMATORS + console = Console() log = logging.getLogger(__name__) @@ -121,7 +123,6 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans): postgans(str(pdf), outfolder, nbgen) splash() - # Set seed rndgen = Generator(PCG64(seed=0)) console.print("\n• Load PDF sets & Printing Summary:", style="bold blue") @@ -135,14 +136,27 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans): postgan = pdf + "_enhanced" final_result = {"pdfset_name": postgan} enhanced = PdfSet(postgan, xgrid, Q0, NF).build_pdf() + # Shuffled the enhanced PDF grid and save the shuffling + # index in order to restore it later. + shuffled_index = rndgen.choice( + enhanced.shape[0], + enhanced.shape[0], + replace=False + ) + assert enhanced.shape[0] == shuffled_index.shape[0] + enhanced = enhanced[shuffled_index] except RuntimeError as excp: raise LoadingEnhancedError(f"{excp}") nb_iter, ref_estimators = 100000, None - init_index = np.array(extract_index(pdf, compressed)) + extr_index = np.array(extract_index(pdf, compressed)) + init_index = map_index(shuffled_index, extr_index) + assert extr_index.shape[0] == init_index.shape[0] else: final_result = {"pdfset_name": pdf} nb_iter, ref_estimators = 15000, None init_index, enhanced = rndindex, prior + # reset seeds + rndgen = Generator(PCG64(seed=1)) # Create output folder outrslt = postgan if enhanced_already_exists else pdf @@ -196,6 +210,10 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans): erf, index = comp.cma_algorithm(std_dev=0.8) else: raise ValueError(f"{minimizer} is not a valid minimizer.") + # Restore the shuffled index back in case of compression from + # an enhanced set + if enhanced_already_exists: + index = map_index(shuffled_index, index) # Prepare output file final_result["ERFs"] = erf_list diff --git a/src/pycompressor/errfunction.py b/src/pycompressor/errfunction.py index 7f6574d..d2a8fd6 100644 --- a/src/pycompressor/errfunction.py +++ b/src/pycompressor/errfunction.py @@ -276,7 +276,7 @@ class ErfComputation: Number of trials """ - def __init__(self, prior, est_dic, nreduc, folder, rndgen, trials=1000, norm=True): + def __init__(self, prior, est_dic, nreduc, folder, rndgen, trials=10000, norm=True): self.prior = prior self.est_dic = est_dic # Compute estimators for PRIOR replicas diff --git a/src/pycompressor/utils.py b/src/pycompressor/utils.py index 35ef677..ee84fef 100644 --- a/src/pycompressor/utils.py +++ b/src/pycompressor/utils.py @@ -17,6 +17,27 @@ def remap_index(index, shuffled): return np.array(new_idx) +def map_index(refarr, arr): + """Map the the elements in `arr` to the index in which + they occur in `refarr`. + + Parameters + ---------- + arr: np.array(int) + one dimensional array of integers with size N + refarr: np.array(int) + one dimentional array of integers with size M + + Returns + ------- + np.array(int) + one dimentional array of integers with size N + """ + + inds = {e:i for i, e in enumerate(refarr)} + return np.vectorize(inds.get)(arr) + + def extract_estvalues(comp_size): """Extract the result from the prior for a given compressed set (w.r.t the size). From 496a7ed59d5eacb6fb75dcd6e130b76f22580ab4 Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Tue, 23 Mar 2021 09:14:38 +0100 Subject: [PATCH 3/5] Update runcard with last candidate --- runcards/runcard.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runcards/runcard.yml b/runcards/runcard.yml index d4c9a0f..4f9105f 100644 --- a/runcards/runcard.yml +++ b/runcards/runcard.yml @@ -2,13 +2,13 @@ # PDF Set # ################################################### pdfsetting: - pdf: NNPDF40_nnlo_as_0118_1000 + pdf: 210219-02-rs-nnpdf40-1000 existing_enhanced: False ################################################### # Size of compressed PDF replicas # ################################################### -compressed: 500 +compressed: 100 ################################################### # Choice of Minimizer # From 88c123ce916ba7edeae96c923c39f59cc84adc12 Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Wed, 24 Mar 2021 17:07:06 +0100 Subject: [PATCH 4/5] Improve compression of enhanced --- src/pycompressor/compressing.py | 22 +++++++----- src/pycompressor/compressor.py | 4 +-- src/pycompressor/utils.py | 64 +++++++++++++++++++++++++++++---- 3 files changed, 72 insertions(+), 18 deletions(-) diff --git a/src/pycompressor/compressing.py b/src/pycompressor/compressing.py index 9ba475f..40164ac 100644 --- a/src/pycompressor/compressing.py +++ b/src/pycompressor/compressing.py @@ -17,6 +17,8 @@ from pycompressor.compressor import Compress from pycompressor.utils import map_index from pycompressor.utils import extract_index +from pycompressor.utils import preprocess_enhanced +from pycompressor.utils import restore_permutation from pycompressor.estimators import ALLOWED_ESTIMATORS @@ -69,7 +71,7 @@ def check_validity(pdfsetting, compressed, gans, est_dic): def check_adiabaticity(pdfsetting, gans, compressed): """ Check whether we are in an adiabatic optimization and if so if it can be performed """ pdf_name = pdfsetting["pdf"] - if pdfsetting.get("existing_enhanced") and not gans.get("enhanced"): + if pdfsetting.get("existing_enhanced") and not gans.get("enhanced"): adiabatic_result = f"{pdf_name}/compress_{pdf_name}_{compressed}_output.dat" if not pathlib.Path(adiabatic_result).exists(): raise CheckError( @@ -135,21 +137,22 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans): try: postgan = pdf + "_enhanced" final_result = {"pdfset_name": postgan} - enhanced = PdfSet(postgan, xgrid, Q0, NF).build_pdf() + enhcd_grid = PdfSet(postgan, xgrid, Q0, NF).build_pdf() + processed, pindex, counts = preprocess_enhanced(enhcd_grid) # Shuffled the enhanced PDF grid and save the shuffling # index in order to restore it later. shuffled_index = rndgen.choice( - enhanced.shape[0], - enhanced.shape[0], + processed.shape[0], + processed.shape[0], replace=False ) - assert enhanced.shape[0] == shuffled_index.shape[0] - enhanced = enhanced[shuffled_index] + enhanced = processed[shuffled_index] except RuntimeError as excp: raise LoadingEnhancedError(f"{excp}") nb_iter, ref_estimators = 100000, None extr_index = np.array(extract_index(pdf, compressed)) - init_index = map_index(shuffled_index, extr_index) + map_pindex = map_index(pindex, extr_index) + init_index = map_index(shuffled_index, map_pindex) assert extr_index.shape[0] == init_index.shape[0] else: final_result = {"pdfset_name": pdf} @@ -213,7 +216,7 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans): # Restore the shuffled index back in case of compression from # an enhanced set if enhanced_already_exists: - index = map_index(shuffled_index, index) + index = restore_permutation(index, shuffled_index, pindex) # Prepare output file final_result["ERFs"] = erf_list @@ -225,7 +228,8 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans): console.print(f"\n• Final ERF: [bold red]{erf}.", style="bold red") # Compute final ERFs for the final choosen replicas - final_err_func = comp.final_erfs(index) + samples = enhcd_grid if enhanced_already_exists else enhanced + final_err_func = comp.final_erfs(samples, index) serfile = open(f"{out_folder}/erf_reduced.dat", "a+") serfile.write(f"{compressed}:") serfile.write(json.dumps(final_err_func)) diff --git a/src/pycompressor/compressor.py b/src/pycompressor/compressor.py index 523cc88..b8f3ba8 100644 --- a/src/pycompressor/compressor.py +++ b/src/pycompressor/compressor.py @@ -82,7 +82,7 @@ def all_error_function(self, index): erf_res = self.err_func.compute_all_erf(reduc_rep) return erf_res - def final_erfs(self, index): + def final_erfs(self, enhanced, index): """Compute the final ERF after minimization. Parameters @@ -96,7 +96,7 @@ def final_erfs(self, index): Dictionary containing the list of estimators and their respective values. """ - selected_replicas = self.enhanced[index] + selected_replicas = enhanced[index] erfs = self.err_func.compute_all_erf(selected_replicas) return erfs diff --git a/src/pycompressor/utils.py b/src/pycompressor/utils.py index ee84fef..64f46b0 100644 --- a/src/pycompressor/utils.py +++ b/src/pycompressor/utils.py @@ -8,13 +8,30 @@ log = logging.getLogger(__name__) -def remap_index(index, shuffled): - new_idx = [] - for idx in index: - # TODO: Implement exception - pos = np.where(shuffled == idx)[0][0] - new_idx.append(pos) - return np.array(new_idx) +def preprocess_enhanced(enhanced, dec_check=15): + """Pre-process the enhanced set by removing duplicates + in the PDF grid. + + Parameters + ---------- + enhanced: np.array(float) + enhanced PDF grid + + Returns + ------- + tuple(np.array, np.array, np.array) + tuple that returns the pre-processed array, the indices + that are kept and the number of times each array occured. + """ + + rounded = np.round(enhanced, dec_check) + preprocessed, index, counts = np.unique( + rounded, + axis=0, + return_index=True, + return_counts=True + ) + return preprocessed, index, counts def map_index(refarr, arr): @@ -38,6 +55,39 @@ def map_index(refarr, arr): return np.vectorize(inds.get)(arr) +def restore_permutation(index, shuffle, preprocess): + """Undo the maping of indices due to the preprocessing + and the shuffling. + + Parameters + ---------- + index: np.array() + array containing the final indices + shuffle: np.array(float) + array containing the permutation + preprocess: np.array(float) + array containing the indices of the pre-processing + + Returns + ------- + np.array(float) + array of index + """ + + undo_shuffle = shuffle[index] + undo_preproc = preprocess[undo_shuffle] + return undo_preproc + + +def remap_index(index, shuffled): + new_idx = [] + for idx in index: + # TODO: Implement exception + pos = np.where(shuffled == idx)[0][0] + new_idx.append(pos) + return np.array(new_idx) + + def extract_estvalues(comp_size): """Extract the result from the prior for a given compressed set (w.r.t the size). From 8d9b0ba0ad2817e17a7f8abfda0a0bd04e3d0a8f Mon Sep 17 00:00:00 2001 From: Radonirinaunimi Date: Thu, 25 Mar 2021 00:23:16 +0100 Subject: [PATCH 5/5] fix printings --- src/pycompressor/compressing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pycompressor/compressing.py b/src/pycompressor/compressing.py index 40164ac..a462a1e 100644 --- a/src/pycompressor/compressing.py +++ b/src/pycompressor/compressing.py @@ -41,11 +41,11 @@ def splash(): style = Style(color="blue") logo = Table(show_header=True, header_style="bold blue", style=style) - logo.add_column("š–•š–žš•®š–”š–’š–•š–—š–Šš–˜š–˜š–”š–—", justify="center", width=60) + logo.add_column("š–•š–žš•®š–”š–’š–•š–—š–Šš–˜š–˜š–”š–—", justify="center", width=76) logo.add_row("[bold blue]Fast python compressor for PDF replicas.") logo.add_row("[bold blue]https://n3pdf.github.io/pycompressor/") logo.add_row("[bold blue]Ā© N3PDF 2021") - logo.add_row("[bold blue]Authors: Stefano Carrazza, Juan E. Cruz-Martinez, Tanjona R. Rabemananjara") + logo.add_row("[bold blue]Authors: Stefano Carrazza, Juan M. Cruz-Martinez, Tanjona R. Rabemananjara") console.print(logo) @@ -176,7 +176,7 @@ def compressing(pdfsetting, compressed, minimizer, est_dic, gans): table.add_row("PDF set name", f"{pdf}") table.add_row("Size of Prior", f"{prior.shape[0] - 1} replicas") if enhanced_already_exists: - table.add_row("Size of enhanced", f"{enhanced.shape[0] - 1} replicas") + table.add_row("Size of enhanced", f"{enhcd_grid.shape[0] - 1} replicas") table.add_row("Size of compression", f"{compressed} replicas") table.add_row("Input energy Q0", f"{Q0} GeV") table.add_row(