Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 119 additions & 0 deletions schemas/bedbase/output_fairtrack.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
description: bedboss fairtrack output schema

properties:
samples:
type: array
items:
type: object
properties:
local_id:
type: string
description: "Name of the bed sample (digest)"
global_id:
type: string
description: "Unique sample identifier of the bed source (e.g. 'encode:ENCBS192PUU', 'geo:GSM1234')"
species_id:
type: string
description: "Taxonomy id based on NCBI database (e.g. taxonomy:9606)"
species_name:
type: string
description: "organism name. (e.g. homo sapiens)"
biospecimen_class:
type: string
description: (e.g. Cell)
sample_type:
type: object
properties:
cell_type:
type: string
description: "cell type (e.g. K562)"
abnormal_cell_type:
type: string
description: ...
cell_line:
type: string
description: "cell line (e.g. C4-2B)"
organism_part:
type: string
description: "(e.g. liver)"

technique:
type: string
description: "Main technique used in experiment (e.g., laboratory, computational or statistical technique)"
target:
type: string
description: "Main target of the experiment (e.g. H3K4_trimethylation)"
study_reference:
type: string
description: Reference identifier of the study
gene_id:
type: string
description: HGNC identifier for gene targeted by the experiment (e.g., specific transcription factor used as ChIP-seq antibody).
gene_product_type:
type: string
description: Gene product type targeted by the experiment (e.g., miRNA)
macromolecular_structure:
type: string
description: Macromolecular structure targeted by the experiment (e.g., chromatin strucure)
lab_protocol_description:
type: string
description: "Free-text description of lab protocol, or URL to such description"
compute_protocol_description:
type: string
description: Free-text description of computational protocol, or URL to such description

assembly_id:
type: string
description: "Genome assembly identifier. e.g. insdc.gca:GCF_000001405.26"
assembly_name:
type: string
description: "Genome assembly name e.g. GRCh38"
experiment_ref:
type: string
description: "Reference to the experiment of the track (e.g., encode:ENCSR000DQP)"
file_url:
type: string
description: "URL to the file"
file_name:
type: string
description: "Name of the file (e.g., ENCFF000VZC.bed.gz)"
label_short:
type: string
description: A short label of the track file. Suggested maximum length is 25 characters
label_long:
type: string
description: A long label of the track file. Suggested maximum length is 80 characters
file_format:
type: string
description: File format (e.g., "bed", "narrowPeak", "broadPeak")
type_of_condensed_data:
type: string
description: (e.g. "Narrow peaks")
geometric_track_type:
type: string
description: (e.g. Segments)
checksum:
type: string
description: Method of checksum generation. (eg. MD5)

required:
- local_id
- species_id
- biospecimen_class
- sample_type
- technique
- target
- study_reference
- assembly_id
- assembly_name
- experiment_ref
- file_url
- label_short
- label_long
- file_format
- type_of_condensed_data
- geometric_track_type
- checksum

required:
- samples
81 changes: 81 additions & 0 deletions schemas/bedbase/output_schema.yaml
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is a pephub output schema, describing minimal columns needed for pephub.

BED Attribute Standardizer Output Schema (BASOS)

Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
description: bedboss output schema

properties:
samples:
type: array
items:
type: object
properties:
sample_name:
type: string
description: "Name of the bed sample (digest)"
assembly_name:
type: string
description: "organism genome code (e.g. hg19, mm10)"
assembly_id:
type: string
description: "Genome assembly identifier - from refgenie"
bed_type:
type: string
description: "bed type (e.g. bed3 / bed6 / bed9+3)"
pattern: "^bed(?:[3-9]|1[0-5])(?:\+|$)[0-9]?+$"
format_type:
type: string
description: "whether the regions are narrow (transcription factor implies narrow, histone mark implies broad peaks)"
enum: [ "narrowpeak", "broadpeak", "bed" ]
organism:
type: string
description: "organism name (e.g. Homo sapiens)"
species_id:
type: string
description: "Taxonomy id based on NCBI database (e.g. taxonomy:9606)"
cell_type:
type: string
description: "cell type (e.g. K562)"
cell_line:
type: string
description: "cell line (e.g. C4-2B)"
exp_protocol:
type: string
description: "Molecular biology technique that was used in the experiment (e.g. DNase-seq, TF ChIP-seq, histone ChIP-seq, ATAC-seq)"
library_source:
type: string
description: "(e.g. genomic DNA, DNA, mRNA)"
target:
type: string
description: "The target of the experiment (e.g. H3K36me3)"
antibody:
type: string
description: "antibody used in the experiment (anti-H3K36me3)"
tissue:
type: string
description: "Tissue or organ used in the experiment (e.g blood / liver / brain)"
treatment:
type: string
description: "Treatment applied to the sample"
global_sample_id:
type: string
description: "Unique sample identifier of the bed source (e.g. 'encode:ENCBS192PUU', 'geo:GSM1234')"
global_experiment_id:
type: string
description: "Unique experiment identifier of the bed source (e.g. 'encode:ENCSR000EOT', 'geo:GSE1234')"
description:
type: string
description: "freeform description of the sample"
file_url:
type: string
description: "URL to the bed file"
file_name:
type: string
description: "Name of the bed file"

required:
- sample_name
- genome
- bed_type
- format_type
- file_url
- file_name

required:
- samples