From ed1ca2872d1abab46d0dd86f780fbec275bbbe49 Mon Sep 17 00:00:00 2001 From: Artyom Zaporozhets Date: Sat, 20 Sep 2025 17:56:40 +0300 Subject: [PATCH 1/2] #29: added nature catalog --- app/data/model/__init__.py | 4 ---- app/data/model/designation.py | 3 --- app/data/model/helpers.py | 26 -------------------------- app/data/model/icrs.py | 8 -------- app/data/model/interface.py | 4 ---- app/data/model/redshift.py | 3 --- 6 files changed, 48 deletions(-) diff --git a/app/data/model/__init__.py b/app/data/model/__init__.py index af58daea..a399c414 100644 --- a/app/data/model/__init__.py +++ b/app/data/model/__init__.py @@ -1,8 +1,6 @@ from app.data.model.bibliography import Bibliography from app.data.model.designation import DesignationCatalogObject from app.data.model.helpers import ( - CatalogObjectEncoder, - Layer0CatalogObjectDecoder, get_catalog_object_type, new_catalog_object, ) @@ -48,8 +46,6 @@ "Layer2Object", "RawCatalog", "CatalogObject", - "CatalogObjectEncoder", - "Layer0CatalogObjectDecoder", "DesignationCatalogObject", "ICRSCatalogObject", "RedshiftCatalogObject", diff --git a/app/data/model/designation.py b/app/data/model/designation.py index bcdc197b..19f31c0f 100644 --- a/app/data/model/designation.py +++ b/app/data/model/designation.py @@ -19,9 +19,6 @@ def __eq__(self, value: object) -> bool: def from_custom(cls, design: Any) -> Self: return cls(str(design)) - def layer0_data(self) -> dict[str, Any]: - return {"design": self.designation} - @classmethod def aggregate(cls, objects: list[Self]) -> Self: """ diff --git a/app/data/model/helpers.py b/app/data/model/helpers.py index 66226f0c..a667ed07 100644 --- a/app/data/model/helpers.py +++ b/app/data/model/helpers.py @@ -1,32 +1,6 @@ -import json - from app.data.model import designation, icrs, interface, redshift -class CatalogObjectEncoder(json.JSONEncoder): - def default(self, obj): - if not isinstance(obj, interface.CatalogObject): - return json.JSONEncoder.default(self, obj) - - data = obj.layer0_data() - data["catalog"] = obj.catalog().value - - return data - - -class Layer0CatalogObjectDecoder(json.JSONDecoder): - def __init__(self, *args, **kwargs): - super().__init__(object_hook=self.object_hook, **kwargs) - - def object_hook(self, obj): - if "catalog" not in obj: - return obj - - catalog = interface.RawCatalog(obj.pop("catalog")) - - return new_catalog_object(catalog, **obj) - - def get_catalog_object_type(catalog: interface.RawCatalog) -> type[interface.CatalogObject]: if catalog == interface.RawCatalog.DESIGNATION: return designation.DesignationCatalogObject diff --git a/app/data/model/icrs.py b/app/data/model/icrs.py index 146cf4ae..d69027cc 100644 --- a/app/data/model/icrs.py +++ b/app/data/model/icrs.py @@ -47,14 +47,6 @@ def from_custom( astronomy.to(e_dec, "deg"), ) - def layer0_data(self) -> dict[str, Any]: - return { - "ra": self.ra, - "dec": self.dec, - "e_ra": self.e_ra, - "e_dec": self.e_dec, - } - def __eq__(self, value: object) -> bool: if not isinstance(value, ICRSCatalogObject): return False diff --git a/app/data/model/interface.py b/app/data/model/interface.py index 83712864..202b5d78 100644 --- a/app/data/model/interface.py +++ b/app/data/model/interface.py @@ -49,10 +49,6 @@ def aggregate(cls, objects: list[Self]) -> Self: def catalog(self) -> RawCatalog: pass - @abc.abstractmethod - def layer0_data(self) -> dict[str, Any]: - pass - @classmethod @abc.abstractmethod def from_custom(cls, **kwargs) -> Self: diff --git a/app/data/model/redshift.py b/app/data/model/redshift.py index 8647b96c..d7ed3b8b 100644 --- a/app/data/model/redshift.py +++ b/app/data/model/redshift.py @@ -40,9 +40,6 @@ def from_custom( return cls(data_cz, data_e_cz) - def layer0_data(self) -> dict[str, Any]: - return {"cz": self.cz, "e_cz": self.e_cz} - @classmethod def aggregate(cls, objects: list[Self]) -> Self: e_cz = [obj.e_cz for obj in objects] From db60aecc2d2f89aee9772400acac5c2afb347227 Mon Sep 17 00:00:00 2001 From: Artyom Zaporozhets Date: Fri, 26 Sep 2025 09:48:26 +0300 Subject: [PATCH 2/2] object nature catalog --- app/data/model/__init__.py | 2 + app/data/model/designation.py | 20 ++---- app/data/model/errors.py | 2 + app/data/model/helpers.py | 19 ++++-- app/data/model/icrs.py | 3 +- app/data/model/interface.py | 4 +- app/data/model/nature.py | 72 ++++++++++++++++++++ app/data/model/redshift.py | 3 +- app/data/repositories/layer2/repository.py | 6 -- app/lib/storage/enums.py | 10 +++ app/lib/storage/postgres/postgres_storage.py | 1 + postgres/migrations/V012__object_nature.sql | 22 ++++++ 12 files changed, 132 insertions(+), 32 deletions(-) create mode 100644 app/data/model/errors.py create mode 100644 app/data/model/nature.py create mode 100644 postgres/migrations/V012__object_nature.sql diff --git a/app/data/model/__init__.py b/app/data/model/__init__.py index a399c414..7310edcd 100644 --- a/app/data/model/__init__.py +++ b/app/data/model/__init__.py @@ -7,6 +7,7 @@ from app.data.model.icrs import ICRSCatalogObject from app.data.model.interface import CatalogObject, MeasuredValue, RawCatalog, get_object from app.data.model.layer2 import Layer2CatalogObject, Layer2Object +from app.data.model.nature import NatureCatalogObject from app.data.model.records import ( CIResult, CIResultObjectCollision, @@ -49,6 +50,7 @@ "DesignationCatalogObject", "ICRSCatalogObject", "RedshiftCatalogObject", + "NatureCatalogObject", "get_catalog_object_type", "new_catalog_object", "HomogenizationRule", diff --git a/app/data/model/designation.py b/app/data/model/designation.py index 19f31c0f..53342ca1 100644 --- a/app/data/model/designation.py +++ b/app/data/model/designation.py @@ -1,4 +1,5 @@ import re +import statistics from typing import Any, Self, final from app.data.model import interface @@ -21,23 +22,10 @@ def from_custom(cls, design: Any) -> Self: @classmethod def aggregate(cls, objects: list[Self]) -> Self: - """ - Aggregate designation is selected as the most common designation among all objects. - """ - name_counts = {} + return cls(statistics.mode([obj.designation for obj in objects])) - for obj in objects: - name_counts[obj.designation] = name_counts.get(obj.designation, 0) + 1 - - max_name = "" - - for name, count in name_counts.items(): - if count > name_counts.get(max_name, 0): - max_name = name - - return cls(max_name) - - def catalog(self) -> interface.RawCatalog: + @classmethod + def catalog(cls) -> interface.RawCatalog: return interface.RawCatalog.DESIGNATION @classmethod diff --git a/app/data/model/errors.py b/app/data/model/errors.py new file mode 100644 index 00000000..2a37d9c5 --- /dev/null +++ b/app/data/model/errors.py @@ -0,0 +1,2 @@ +class CatalogObjectCreationError(Exception): + pass diff --git a/app/data/model/helpers.py b/app/data/model/helpers.py index a667ed07..83213fd3 100644 --- a/app/data/model/helpers.py +++ b/app/data/model/helpers.py @@ -1,13 +1,18 @@ -from app.data.model import designation, icrs, interface, redshift +from app.data.model import designation, icrs, interface, nature, redshift + +ALLOWED_CATALOG_OBJECTS = [ + designation.DesignationCatalogObject, + icrs.ICRSCatalogObject, + redshift.RedshiftCatalogObject, + nature.NatureCatalogObject, +] + +catalog_to_objtype = {t.catalog(): t for t in ALLOWED_CATALOG_OBJECTS} def get_catalog_object_type(catalog: interface.RawCatalog) -> type[interface.CatalogObject]: - if catalog == interface.RawCatalog.DESIGNATION: - return designation.DesignationCatalogObject - if catalog == interface.RawCatalog.ICRS: - return icrs.ICRSCatalogObject - if catalog == interface.RawCatalog.REDSHIFT: - return redshift.RedshiftCatalogObject + if catalog in catalog_to_objtype: + return catalog_to_objtype[catalog] raise ValueError(f"Unknown catalog: {catalog}") diff --git a/app/data/model/icrs.py b/app/data/model/icrs.py index d69027cc..56eb2917 100644 --- a/app/data/model/icrs.py +++ b/app/data/model/icrs.py @@ -71,7 +71,8 @@ def aggregate(cls, objects: list[Self]) -> Self: return cls(ra, dec, e_ra, e_dec) - def catalog(self) -> interface.RawCatalog: + @classmethod + def catalog(cls) -> interface.RawCatalog: return interface.RawCatalog.ICRS def layer1_data(self) -> dict[str, Any]: diff --git a/app/data/model/interface.py b/app/data/model/interface.py index 202b5d78..c0e2ea98 100644 --- a/app/data/model/interface.py +++ b/app/data/model/interface.py @@ -33,6 +33,7 @@ class RawCatalog(enum.Enum): ICRS = "icrs" DESIGNATION = "designation" REDSHIFT = "redshift" + NATURE = "nature" class CatalogObject(abc.ABC): @@ -45,8 +46,9 @@ class CatalogObject(abc.ABC): def aggregate(cls, objects: list[Self]) -> Self: pass + @classmethod @abc.abstractmethod - def catalog(self) -> RawCatalog: + def catalog(cls) -> RawCatalog: pass @classmethod diff --git a/app/data/model/nature.py b/app/data/model/nature.py new file mode 100644 index 00000000..eca55b0a --- /dev/null +++ b/app/data/model/nature.py @@ -0,0 +1,72 @@ +import statistics +from typing import Any, Self, final + +from app.data.model import errors, interface +from app.lib.storage import enums + +# all options are lowercase since we will .lower() everything anyway +options = { + enums.Nature.STAR: ["*", "s", "star"], + enums.Nature.STAR_SYSTEM: ["*s", "**", "stars", "c", "s2", "s3", "a", "s+", "gc", "oc"], + enums.Nature.INTERSTELLAR_MEDIUM: ["ism"], + enums.Nature.GALAXY: ["g", "gal", "galaxy"], + enums.Nature.MULTIPLE_GALAXIES: ["mg", "m2", "m3", "mc"], + enums.Nature.OTHER: ["o"], + enums.Nature.ERROR: ["!", "e", "x", "pg", "u"], +} + +option_to_nature: dict[str, enums.Nature] = {} +for nature, names in options.items(): + for name in names: + option_to_nature[name] = nature + + +@final +class NatureCatalogObject(interface.CatalogObject): + def __init__(self, nature: enums.Nature) -> None: + self.nature = nature + + @classmethod + def from_custom(cls, nature: str) -> Self: + if (n := nature.lower()) in option_to_nature: + return cls(option_to_nature[n]) + + raise errors.CatalogObjectCreationError(f"Unknown object type: {nature}") + + @classmethod + def aggregate(cls, objects: list[Self]) -> Self: + return cls(statistics.mode([obj.nature for obj in objects])) + + @classmethod + def catalog(cls) -> interface.RawCatalog: + return interface.RawCatalog.NATURE + + @classmethod + def layer1_table(cls) -> str: + return "nature.data" + + @classmethod + def layer1_keys(cls) -> list[str]: + return ["nature"] + + def layer1_data(self) -> dict[str, Any]: + return {"nature": self.nature} + + @classmethod + def from_layer1(cls, data: dict[str, Any]) -> Self: + return cls(nature=data["nature"]) + + @classmethod + def layer2_table(cls) -> str: + return "layer2.nature" + + @classmethod + def layer2_keys(cls) -> list[str]: + return ["nature"] + + def layer2_data(self) -> dict[str, Any]: + return {"nature": self.nature} + + @classmethod + def from_layer2(cls, data: dict[str, Any]) -> Self: + return cls(nature=data["nature"]) diff --git a/app/data/model/redshift.py b/app/data/model/redshift.py index d7ed3b8b..97513c92 100644 --- a/app/data/model/redshift.py +++ b/app/data/model/redshift.py @@ -49,7 +49,8 @@ def aggregate(cls, objects: list[Self]) -> Self: return cls(cz, e_cz) - def catalog(self) -> interface.RawCatalog: + @classmethod + def catalog(cls) -> interface.RawCatalog: return interface.RawCatalog.REDSHIFT @classmethod diff --git a/app/data/repositories/layer2/repository.py b/app/data/repositories/layer2/repository.py index 6382f432..b7e343a9 100644 --- a/app/data/repositories/layer2/repository.py +++ b/app/data/repositories/layer2/repository.py @@ -12,12 +12,6 @@ from app.lib import containers from app.lib.storage import postgres -catalogs = [ - model.RawCatalog.ICRS, - model.RawCatalog.DESIGNATION, - model.RawCatalog.REDSHIFT, -] - class Layer2Repository(postgres.TransactionalPGRepository): def __init__(self, storage: postgres.PgStorage, logger: structlog.stdlib.BoundLogger) -> None: diff --git a/app/lib/storage/enums.py b/app/lib/storage/enums.py index 3a0bc862..56d95982 100644 --- a/app/lib/storage/enums.py +++ b/app/lib/storage/enums.py @@ -31,3 +31,13 @@ class RecordCrossmatchStatus(str, enum.Enum): NEW = "new" COLLIDED = "collided" EXISTING = "existing" + + +class Nature(enum.Enum): + STAR = "*" + STAR_SYSTEM = "*S" + INTERSTELLAR_MEDIUM = "ISM" + GALAXY = "G" + MULTIPLE_GALAXIES = "MG" + OTHER = "O" + ERROR = "X" diff --git a/app/lib/storage/postgres/postgres_storage.py b/app/lib/storage/postgres/postgres_storage.py index 8766ddc0..98a25752 100644 --- a/app/lib/storage/postgres/postgres_storage.py +++ b/app/lib/storage/postgres/postgres_storage.py @@ -38,6 +38,7 @@ def dump(self, obj: Any) -> bytes | bytearray | memoryview: (enums.DataType, "common.datatype"), (enums.RawDataStatus, "rawdata.status"), (enums.RecordCrossmatchStatus, "rawdata.crossmatch_status"), + (enums.Nature, "nature.status"), ] diff --git a/postgres/migrations/V012__object_nature.sql b/postgres/migrations/V012__object_nature.sql new file mode 100644 index 00000000..61252633 --- /dev/null +++ b/postgres/migrations/V012__object_nature.sql @@ -0,0 +1,22 @@ +CREATE SCHEMA IF NOT EXISTS nature; + +CREATE TYPE nature.status AS ENUM( + '*', + '*S', + 'ISM', + 'G', + 'MG', + 'O', + 'X' +); + +CREATE TABLE nature.data ( + object_id text PRIMARY KEY REFERENCES rawdata.objects (id), + nature nature.status NOT NULL, + modification_time timestamp without time zone NOT NULL DEFAULT NOW(), +); + +CREATE TABLE layer2.nature ( + pgc integer PRIMARY KEY, + nature nature.status NOT NULL +);