diff --git a/files/ftp-export/genome_coordinates/known-coordinates.sql b/files/ftp-export/genome_coordinates/known-coordinates.sql index bdbef5bc3..6e06fc488 100644 --- a/files/ftp-export/genome_coordinates/known-coordinates.sql +++ b/files/ftp-export/genome_coordinates/known-coordinates.sql @@ -4,4 +4,5 @@ select distinct assembly.ensembl_url, assembly.taxid from ensembl_assembly assembly +where assembly.selected_genome = true ) TO STDOUT CSV diff --git a/files/genes/species.sql b/files/genes/species.sql index c6383d65d..3b3337912 100644 --- a/files/genes/species.sql +++ b/files/genes/species.sql @@ -2,4 +2,5 @@ COPY ( select distinct assembly_id, taxid from ensembl_assembly + where selected_genome = true ) TO STDOUT CSV diff --git a/files/genome-mapping/find_species.sql b/files/genome-mapping/find_species.sql index c87f6e67d..7c8476ba8 100644 --- a/files/genome-mapping/find_species.sql +++ b/files/genome-mapping/find_species.sql @@ -7,4 +7,5 @@ COPY ( FROM ensembl_assembly WHERE division NOT IN ('EnsemblProtists', 'EnsemblFungi') + AND selected_genome = true ) TO STDOUT CSV; diff --git a/files/import-data/post-release/001__coordinate-systems.sql b/files/import-data/post-release/001__coordinate-systems.sql index fd5d7dd6e..95546630a 100644 --- a/files/import-data/post-release/001__coordinate-systems.sql +++ b/files/import-data/post-release/001__coordinate-systems.sql @@ -17,6 +17,7 @@ SELECT load.karyotype_rank FROM load_coordinate_info load JOIN ensembl_assembly ensembl ON ensembl.assembly_id = load.assembly_id +WHERE ensembl.selected_genome = true ) ON CONFLICT (chromosome, assembly_id) DO UPDATE SET diff --git a/files/import-data/post-release/001__ensembl-pseudogenes.sql b/files/import-data/post-release/001__ensembl-pseudogenes.sql index 6aa064e1f..786b7a7db 100644 --- a/files/import-data/post-release/001__ensembl-pseudogenes.sql +++ b/files/import-data/post-release/001__ensembl-pseudogenes.sql @@ -25,6 +25,7 @@ from load_ensembl_pseudogenes load join ensembl_assembly assem on assem.assembly_id = load.assembly_id +where assem.selected_genome = true ) ON CONFLICT (md5(region_name)) DO NOTHING; INSERT INTO ensembl_pseudogene_exons ( diff --git a/files/import-data/post-release/001__locations.sql b/files/import-data/post-release/001__locations.sql index a1d1fe72f..5252fccab 100644 --- a/files/import-data/post-release/001__locations.sql +++ b/files/import-data/post-release/001__locations.sql @@ -27,6 +27,8 @@ on assembly.assembly_id = load.assembly_id WHERE load.chromosome is not null +AND + assembly.selected_genome = true ON CONFLICT (accession, name, local_start, local_end, assembly_id) DO NOTHING ; diff --git a/files/repeats/find-assemblies.sql b/files/repeats/find-assemblies.sql index b98f76e20..3c1f91d85 100644 --- a/files/repeats/find-assemblies.sql +++ b/files/repeats/find-assemblies.sql @@ -7,5 +7,5 @@ FROM ensembl_assembly species WHERE exists(select 1 from rnc_sequence_regions reg where reg.assembly_id = species.assembly_id) and species.division != 'EnsemblFungi' + and species.selected_genome = true ) TO STDOUT CSV; - diff --git a/workflows/databases/mirgenedb.nf b/workflows/databases/mirgenedb.nf index 7277a017e..bd88aea98 100644 --- a/workflows/databases/mirgenedb.nf +++ b/workflows/databases/mirgenedb.nf @@ -7,7 +7,7 @@ process mirgenedb { """ scp $params.databases.mirgenedb.remote mirgenedb.json psql \ - --command='COPY (select assembly_id,assembly_ucsc from ensembl_assembly where assembly_ucsc is not null) TO STDOUT (FORMAT CSV)' \ + --command='COPY (select assembly_id,assembly_ucsc from ensembl_assembly where assembly_ucsc is not null and selected_genome = true) TO STDOUT (FORMAT CSV)' \ "$PGDATABASE" > assemblies.tsv rnac mirgenedb parse assemblies.tsv mirgenedb.json . """