from dataclasses import dataclass, field
import uuid
from shapely.geometry import Point, LineString, Polygon, MultiLineString, MultiPolygon
import shapely
import json
from typing import Optional, Self
from datetime import datetime
import pandas as pd
from .TAEnums import *
# Type aliases for improved code readability and type hints
type GeometryType = Point | LineString | Polygon | MultiLineString | MultiPolygon
"""Union type for all supported Shapely geometry types in the Time Atlas data model."""
type UUID = str
"""String representation of a Universal Unique Identifier (UUID)."""
type ObsReference = Observation | UUID
"""Reference to an Observation entity, either as an object or UUID string."""
type HRReference = HistoricalRecord | UUID
"""Reference to a HistoricalRecord entity, either as an object or UUID string."""
type DatasetReference = Dataset | UUID
"""Reference to a Dataset entity, either as an object or UUID string."""
type POIReference = PointOfInterest | UUID
"""Reference to a PointOfInterest entity, either as an object or UUID string."""
type GeometryReference = Geometry | UUID
"""Reference to a Geometry entity, either as an object or UUID string."""
type AreaReference = Area | UUID
"""Reference to an Area entity, either as an object or UUID string."""
type LayerReference = Layer | UUID
"""Reference to a Layer entity, either as an object or UUID string."""
type MapReference = Map | UUID
"""Reference to a Map entity, either as an object or UUID string."""
class UUIDManager():
"""Utility class for managing UUID generation and parsing in the Time Atlas data model.
Provides methods for generating deterministic UUIDs based on dataset namespaces
and parsing UUIDs from various formats (e.g., URLs). Ensures consistent handling
of unique identifiers across all RDE entities.
"""
namespace: UUID
def __init__(self, namespace):
match namespace:
case str():
if namespace.startswith('http'):
self.namespace = uuid.uuid5(uuid.NAMESPACE_URL, namespace)
else:
raise ValueError(f"Invalid namespace string: {namespace}. Must be a URL starting with 'http'.")
case uuid.UUID():
self.namespace = namespace
case _:
raise ValueError(f"Invalid namespace type: {type(namespace)}. Must be a string URL or a uuid.UUID object.")
@staticmethod
def generate_uuid(namespace: Optional[uuid.UUID | str], value: Optional[str] = None) -> UUID:
if namespace:
manager = UUIDManager(namespace)
return manager._generate_uuid(value)
else:
return str(uuid.uuid4())
@staticmethod
def is_valid_uuid(uuid_string: str) -> bool:
'''
Checks whether the string given as argument is a valid UUID format.
Returns True if the string is a valid UUID (e.g., "80d80427-b711-5324-b1d1-4eeddb059269"), False otherwise.
'''
try:
uuid.UUID(uuid_string)
return True
except (ValueError, AttributeError, TypeError):
return False
def _generate_uuid(self, value: Optional[str] = None) -> UUID:
"""Generate a deterministic UUID based on the namespace and value.
Uses UUIDv5 algorithm to create a unique identifier that is consistent
across different runs given the same namespace and value.
Args:
value: Optional string value to generate the UUID from (e.g., a specific identifier from the data)
Returns:
A UUID string generated from the namespace and value
"""
if value is None:
value = str(uuid.uuid4()) # fallback to random UUID if no value provided
return str(uuid.uuid5(self.namespace, value))
[docs]
@dataclass
class UUIDEntity:
"""Base class for entities that have a unique identifier.
All Research Data Entities (RDE) in the Time Atlas data model have a unique UUID
generated from a UUIDv5 algorithm with a custom dataset-based namespace and a
deterministic identifier from the data.
Attributes:
id: Universal unique identifier of the resource when provided to the constructor, it can be either a valid UUID string, a uuid.UUID object, or a tuple of (UUIDManager, value) to generate a deterministic UUID based on the manager's namespace and the provided value. If no ID is provided, a random UUID will be generated.
"""
id: Optional[UUID | str | tuple[UUIDManager, str]]
def __post_init__(self):
match self.id:
case str():
if not UUIDManager.is_valid_uuid(self.id):
raise ValueError(f"Invalid UUID string: \"{self.id}\". Must be a valid UUID format (e.g., \"80d80427-b711-5324-b1d1-4eeddb059269\").")
case uuid.UUID():
self.id = str(self.id)
case (UUIDManager(), str() as value):
# this ensure that there is no collision of UUIDs across different RDE types, as the value is prefixed by the class name of the entity, and the namespace is the same for all entities of a dataset, so there will be no collision between different datasets either, as they have different namespaces.
current_class_name = self.__class__.__name__.lower()
value = f"{current_class_name}_{value}"
self.id = self.id[0]._generate_uuid(value)
case None:
self.id = UUIDManager.generate_uuid(None) # generate random UUID if no ID provided
case _:
raise ValueError(f"Invalid ID type: {type(self.id)}. Must be a valid UUID string, a uuid.UUID object, or None.")
[docs]
def get_ref(self) -> str:
"""Return the UUID reference of this entity.
Returns:
The UUID string of this entity
"""
return self.id
[docs]
@classmethod
def parse_uuid(cls, data_id: str) -> None:
"""Parse a UUID from various formats.
In the API, unique IDs are often represented as URLs, but in the data model
we keep only the UUID part. This method extracts the UUID from URL format.
Args:
data_id: The ID to parse, either as a UUID string or URL containing UUID
Returns:
The parsed UUID string
"""
# in the API, the unique id is often represented as a URL, but in the data model we want to keep only the UUID part, so we parse it here.
if data_id.startswith('http'):
return data_id.split('/')[-1]
return data_id
[docs]
@dataclass
class RDE:
"""Base class for all Research Data Entities (RDE) in the Time Atlas data model.
Provides common serialization and type handling methods for all RDE types.
All RDE instances are related using numerical identifiers (UUIDs), which are
transformed into URLs when ingested in the backend.
"""
[docs]
def to_dict(self, exclude_fields = {}) -> dict:
"""Convert the RDE instance to a dictionary representation.
Handles proper serialization of nested RDE objects, enums, and special types
like MultiLingualValue and RDETimeRange. Automatically adds the rde_type field
for main RDE entities.
Args:
exclude_fields: Set of field names to exclude from the output dictionary
Returns:
Dictionary representation of the RDE instance
"""
result = {}
for field_name, field_value in self.__dict__.items():
if field_name in exclude_fields:
continue
match field_value:
case UUIDEntity():
result[field_name] = field_value.get_ref()
case RDE():
# works for dataset configuration as well, as it inherits from RDE, but does not have rde_type field, so it will not be added in the final dict
result[field_name] = field_value.to_dict()
case RDETimeRange():
result['start_time'] = field_value.start_time
result['end_time'] = field_value.end_time
case HeightInfo():
result['terrain_height'] = field_value.terrain
result['building_height'] = field_value.building
case list():
result[field_name] = [item.get_ref() if isinstance(item, RDE) else item for item in field_value]
case Enum():
result[field_name] = field_value.value
case MultiLingualValue():
result[field_name] = field_value.values
case _:
result[field_name] = field_value
# dataset configuration and other special cases have no rde_type field, only doing it for main RDE types
rde_name = self.__class__.__name__.lower()
if rde_name in CLASS_NAME_TO_RDE:
result['rde_type'] = CLASS_NAME_TO_RDE[rde_name].value
return result
[docs]
def get_type(self) -> Optional[RDEType]:
"""Get the RDE type enum value for this entity.
Returns:
The RDEType enum value or None if not a main RDE type
"""
rde_name = self.__class__.__name__.lower()
return CLASS_NAME_TO_RDE.get(rde_name).value
[docs]
@classmethod
def constructor_from_json_obj(cls, json_obj: dict) -> Self:
"""Construct an RDE instance from a JSON object.
This method should be implemented in subclasses to handle specific
deserialization logic for each RDE type.
Args:
json_obj: Dictionary containing the RDE data
Returns:
An instance of the RDE subclass
Raises:
NotImplementedError: If not implemented in subclass
"""
raise NotImplementedError('This method should be implemented in subclasses')
[docs]
@dataclass
class RDETimeRange:
"""Represents a temporal range for RDE entities.
Datetime values formatted as ISO 8601 strings representing the range of time
existence for an RDE, denoting the starting and ending points. Used by
Historical Records, Datasets, Maps, and Layers.
Attributes:
start_time: ISO 8601 formatted datetime string for the start of existence
end_time: ISO 8601 formatted datetime string for the end of existence
Raises:
ValueError: If datetime format is invalid
AssertionError: If start_time is greater than end_time
"""
start_time: str
end_time: str
def __post_init__(self):
# Validate time format
datetime.fromisoformat(self.start_time.replace("Z", "+00:00"))
datetime.fromisoformat(self.end_time.replace("Z", "+00:00"))
assert self.start_time <= self.end_time, "start_time must be less than or equal to end_time"
[docs]
@dataclass
class MultiLingualValue:
"""Container for multilingual text values following IIIF format.
Stores text that can be expressed in multiple languages, similar to the
IIIF format for multilingual descriptions. Each language can have multiple
values.
Attributes:
values: Dictionary mapping language codes (2-3 letters) to lists of text values
Example: {"en": ["English text"], "fr": ["Texte français"]}
"""
values: dict[str, list[str]]
[docs]
@dataclass
class DatasetConfiguration(RDE):
"""Operational configuration for a Dataset.
Describes how the Historical Records in a dataset should be handled and
served through the information system. Includes metadata field configurations
and display settings for the frontend.
Attributes:
metadata_field_config: List of configurations for each metadata field in HRs
main_label: Formatting string indicating which metadata to use for main label
sub_label: Formatting string for potential sub-label display
display_thumbnail: Whether HRs have thumbnails that should be displayed
external_source: Whether source button should forward to external URL
"""
metadata_field_config: list[MetadataFieldConfig] = field(default_factory=list)
main_label: str = ''
sub_label: str = ''
display_thumbnail: bool = False
external_source: bool = False
[docs]
@classmethod
def constructor_from_json_obj(cls, json_obj: dict) -> Self:
"""Construct a DatasetConfiguration from a JSON object.
Args:
json_obj: Dictionary containing dataset configuration data
Returns:
DatasetConfiguration instance
"""
return cls(
metadata_field_config=[MetadataFieldConfig.constructor_from_json_obj(v) for v in json_obj.get('metadata_field_config', [])],
main_label=json_obj.get('dataset_config', {}).get('main_label', ''),
sub_label=json_obj.get('dataset_config', {}).get('sub_label', ''),
display_thumbnail=json_obj.get('dataset_config', {}).get('display_thumbnail', False),
external_source=json_obj.get('dataset_config', {}).get('external_source', False)
)
[docs]
def to_dict(self, exclude_fields={}):
"""Convert to dictionary representation.
Args:
exclude_fields: Set of field names to exclude
Returns:
Dictionary representation with serialized metadata field configs
"""
self.metadata_field_config = [v.to_dict() for v in self.metadata_field_config] if self.metadata_field_config else []
return super().to_dict(exclude_fields=exclude_fields)
[docs]
@dataclass
class Dataset(RDE, UUIDEntity):
"""A homogeneous collection of information ingested in the Time Machine system.
Represents the link between research data and its numerical expression and
exploitation. Allows users to access meta/paradata on the dataset level.
The entity is tied to operational configuration describing how the entities
forming the dataset should be handled and served.
Attributes:
id: Universal unique identifier of the dataset
slug: Ad-hoc label identifying the dataset (human-readable)
name: Multilingual short title displayed as header in the frontend
time_range: Temporal range of the dataset's existence
creation_time: Timestamp indicating when this version was created
version: Version label formatted as "X.Y.Z" (major.minor.patch)
sources: List of IIIF manifest UUIDs used to produce the dataset
has_areas: References to areas the dataset is related to
configuration: Metadata configuration for HRs from this dataset
metadata: List of free-form metadata fields for contextual information
hrs: List of Historical Records in this dataset (processing only)
obs: List of Observations in this dataset (processing only)
"""
slug: str
name: MultiLingualValue
time_range: RDETimeRange
creation_time: Optional[str] = None
version: Optional[str] = None
sources: list[str] = field(default_factory=list)
has_areas: Optional[list[AreaReference]] = field(default_factory=list)
configuration: DatasetConfiguration = field(default_factory=DatasetConfiguration)
metadata: list[FreeFormMetadata] = field(default_factory=list)
# fields that do not exist in the RDE data model, only there to make python processing easier:
hrs: list['HistoricalRecord'] = field(default_factory=list)
obs: list['Observation'] = field(default_factory=list)
[docs]
@classmethod
def constructor_from_json_obj(cls, json_obj: dict) -> Self:
"""Construct a Dataset from a JSON object.
Args:
json_obj: Dictionary containing dataset data
Returns:
Dataset instance
"""
config_data = json_obj.get('configuration')
configuration = DatasetConfiguration.constructor_from_json_obj(config_data) if config_data else None
return cls(
id=UUIDEntity.parse_uuid(json_obj['id']),
slug=json_obj['slug'],
name=MultiLingualValue(values=json_obj['name']),
metadata = [FreeFormMetadata(type=METADATA_TYPE_TO_ENUM.get(m['type'], MetadataType.STRING),
label=MultiLingualValue(values=m.get('label', {})),
value=MultiLingualValue(values=m.get('value', {}))) for m in json_obj.get('metadata', [])],
time_range=RDETimeRange(json_obj['start_time'], json_obj['end_time']),
configuration=configuration,
creation_time=json_obj.get('creation_time', None),
version=json_obj.get('version', None),
sources=json_obj.get('sources', []),
has_areas=json_obj.get('has_areas', [])
)
# override to exclude specific fields
[docs]
def to_dict(self, exclude_fields = {'hrs', 'obs'}) -> dict:
"""Convert to dictionary representation.
Args:
exclude_fields: Fields to exclude (defaults to processing-only fields)
Returns:
Dictionary representation with serialized nested objects
"""
self.configuration = self.configuration.to_dict() if self.configuration else None
self.metadata = [v.to_dict() for v in self.metadata] if self.metadata else []
return super().to_dict(exclude_fields=exclude_fields)
[docs]
def instantiate_all_rde_members(self, rde_list: list[RDE]) -> None:
"""Populate the hrs and obs lists from a list of RDE entities.
Helper method for processing that adds all HRs and Observations belonging
to this dataset to the internal lists.
Args:
rde_list: List of RDE entities to filter and add
"""
for rde in rde_list:
if hasattr(rde, "dataset") and RDEType.dataset == self.id:
match rde:
case HistoricalRecord(): self.hrs.append(rde)
case Observation(): self.obs.append(rde)
[docs]
@dataclass
class HistoricalRecord(RDE, UUIDEntity):
"""A single "atom" of knowledge from a historical document.
An Historical Record represents a record of information about a place, location,
or set of people found from a historical document. It is the source from which
any information accessible through the Time Machine projects comes from.
Examples: a census entry, a parcel listing row, a sentence from a research book,
a photograph depicting an urban space.
The granularity should be as precise as possible, with the source URL ideally
being a IIIF annotation of the information from a document's scan.
Attributes:
id: Universal unique identifier of the historical record
dataset: Reference to the dataset this HR belongs to
time_range: Temporal range of the record's existence
paradata: How the data was acquired (manual, semi-automatic, automatic)
has_observations: List of observations documented in this historical source
metadata: Dictionary of arbitrary key-value pairs storing all metadata
rights_attribution: Optional rights and attribution information
"""
dataset: DatasetReference
time_range: RDETimeRange
paradata: ParadataValues
has_observations: list[ObsReference]
metadata: dict = field(default_factory=dict)
rights_attribution: Optional[str] = None
[docs]
@classmethod
def constructor_from_json_obj(cls, json_obj: dict) -> Self:
"""Construct a HistoricalRecord from a JSON object.
Args:
json_obj: Dictionary containing historical record data
Returns:
HistoricalRecord instance
"""
return cls(
id=UUIDEntity.parse_uuid(json_obj['id']),
dataset=UUIDEntity.parse_uuid(json_obj['dataset']['id']),
time_range=RDETimeRange(json_obj['start_time'], json_obj['end_time']),
paradata=json_obj.get('paradata', ''),
type=json_obj.get('type', ''),
has_observations=json_obj.get('has_observations', []),
metadata=json_obj.get('metadata', {}),
rights_attribution=json_obj.get('rights_attribution')
)
[docs]
def actualize_observations_references(self, entity_list: dict[UUID, RDE]) -> None:
"""Replace observation UUID references with actual Observation objects.
Args:
entity_list: Dictionary mapping UUIDs to RDE instances
"""
self.has_observations = [entity_list[obs_ref] if isinstance(obs_ref, str) and obs_ref in entity_list else obs_ref for obs_ref in self.has_observations]
[docs]
def to_dict(self, flatten_metadata:bool = False) -> dict:
"""Convert to dictionary representation.
Args:
flatten_metadata: If True, flatten metadata dict into top-level fields
Returns:
Dictionary representation with optional flattened metadata
"""
result = super().to_dict()
# HR specific serialization for documents
result['has_observations'] = [obs.get_ref() if isinstance(obs, RDE) else obs for obs in self.has_observations]
if flatten_metadata:
for k,v in self.metadata.items():
result[k] = v
result.pop('metadata', None)
return result
[docs]
@classmethod
def constructor_from_dataframe_row(cls, row:pd.Series) -> Self:
"""Construct a HistoricalRecord from a pandas DataFrame row.
Automatically extracts metadata fields from columns not used by core attributes.
Args:
row: pandas Series representing a row from a DataFrame
Returns:
HistoricalRecord instance
"""
metadata_keys = set(row.index).difference({'uuid', 'dataset', 'start_time', 'end_time', 'paradata', 'type', 'has_observations', 'rights_attribution'})
metadata = {k: row[k] for k in metadata_keys}
return cls(
id=UUIDEntity.parse_uuid(row['id']),
dataset=UUIDEntity.parse_uuid(row['dataset']),
time_range=RDETimeRange(row['start_time'], row['end_time']),
paradata=row.get('paradata', ''),
type=row.get('type', ''),
has_observations=row.get('has_observations', []),
metadata=metadata,
rights_attribution=row.get('rights_attribution')
)
[docs]
@dataclass
class HeightInfo:
"""Elevation information for Points of Interest.
Stores elevation data separated between terrain and building height,
both expressed in meters. This information is typically referenced from
Maptiler's Database and is used by the interface to correctly place
POIs in 3D vision mode.
Attributes:
terrain: Height of the terrain in meters
building: Height of the building in meters
"""
terrain: Optional[float] = None
building: Optional[float] = None
[docs]
@dataclass
class PointOfInterest(RDE, UUIDEntity):
"""A point that has been observed by one or many observations.
Points of Interest are what have been observed and relate to coordinate handles
of observations to place on a map. They can be pointed to by multiple Observations
from different datasets, acting as an aggregate by virtue of having observations
located on the same exact coordinate space.
Attributes:
id: Universal unique identifier of the POI
geometry: GPS coordinates of the POI as a Shapely Point
height: Elevation information (terrain and building height) in meters
"""
geometry: Point
height: HeightInfo
[docs]
@classmethod
def constructor_from_json_obj(cls, json_obj: dict) -> Self:
"""Construct a PointOfInterest from a JSON object.
Handles both standard JSON and GeoJSON Feature formats.
Args:
json_obj: Dictionary containing POI data
Returns:
PointOfInterest instance
"""
geom = json_obj.get('geometry')
json_obj = json_obj.get('properties', json_obj) # in case the JSON object is a GeoJSON Feature object
return cls(
id=UUIDEntity.parse_uuid(json_obj['id']),
geometry=(shapely.from_geojson(json.dumps(geom)) if geom else None),
height=HeightInfo(
terrain=json_obj.get('terrain_height'),
building=json_obj.get('building_height')
)
)
[docs]
@dataclass
class Observation(RDE, UUIDEntity):
"""The space-time representation of information recorded in a historical source.
An Observation is tied to a single point of physical space represented by a
single latitude and longitude. It can be a physical location (e.g., cadastral
parcel) or an event (e.g., apprenticeship). It serves as a pivot entity linking
Historical Records, Points of Interest, and Geometries.
A single HR can hold multiple Observations (e.g., a postcard showing multiple
identified landmarks, where each landmark gets a dedicated Observation).
Attributes:
id: Universal unique identifier of the observation
historical_record: Reference to the HR that attests to this observation's existence
geometry: GPS coordinates of the observation as a Shapely Point
has_geometries: List of geometry entities tied to this observation
part_of_point_of_interest: Optional reference to the associated POI
"""
historical_record: HRReference
geometry: Point
has_geometries: list[GeometryReference] = field(default_factory=list)
part_of_point_of_interest: Optional[POIReference] = None
[docs]
def actualize_references(self, entity_list: dict[UUID, RDE]) -> None:
"""Replace UUID references with actual RDE objects.
Args:
entity_list: Dictionary mapping UUIDs to RDE instances
"""
self.has_geometries = [entity_list[geom_ref] if isinstance(geom_ref, str) and geom_ref in entity_list else geom_ref for geom_ref in self.has_geometries]
self.part_of_point_of_interest = entity_list[self.part_of_point_of_interest] if isinstance(self.part_of_point_of_interest, str) and self.part_of_point_of_interest in entity_list else self.part_of_point_of_interest
self.historical_record = entity_list[self.historical_record] if isinstance(self.historical_record, str) and self.historical_record in entity_list else self.historical_record
[docs]
@classmethod
def constructor_from_json_obj(cls, json_obj: dict) -> Self:
"""Construct an Observation from a JSON object.
Handles both standard JSON and GeoJSON Feature formats.
Args:
json_obj: Dictionary containing observation data
Returns:
Observation instance
"""
geom = json_obj.get('geometry')
json_obj = json_obj.get('properties', json_obj) # in case the JSON object is a GeoJSON Feature object
return cls(
id=UUIDEntity.parse_uuid(json_obj['id']),
historical_record=json_obj.get('documented_in')[0] if isinstance(json_obj.get('documented_in'), list) and len(json_obj.get('documented_in')) > 0 else None,
geometry=shapely.from_geojson(json.dumps(geom)),
has_geometries=json_obj.get('has_geometries', []),
# height=HeightInfo(
# terrain=json_obj.get('height', {}).get('terrain'),
# building=json_obj.get('height', {}).get('building')
# ),
part_of_point_of_interest=json_obj.get('part_of_point_of_interest', None)
)
[docs]
@dataclass
class GeographicalExtent:
"""Bounding box representing the geographical extent of a map or layer.
Stores the boundary coordinates in [min_x, min_y, max_x, max_y] format,
representing the spatial limits of a geographical entity.
Attributes:
coordinates: List of four float values [min_x, min_y, max_x, max_y]
Raises:
AssertionError: If coordinates don't meet format requirements
"""
coordinates: list[float]
def __post_init__(self):
assert len(self.coordinates) == 4, "Extent must have four coordinates: [min_x, min_y, max_x, max_y]"
assert self.coordinates[0] < self.coordinates[2], "min_x must be less than max_x"
assert self.coordinates[1] < self.coordinates[3], "min_y must be less than max_y"
# note that the two asserts above would faile on map that are exactly on limits of the negative
# latitude (-0.0) or longitude (-0.0), it is unlikey we ingest map from such zones (and most GIS
# software specially avoid it: https://en.wikipedia.org/wiki/180th_meridian)
[docs]
@dataclass
class Map(RDE, UUIDEntity):
"""A group of geographical layers stemming from a single historical map.
Represents a historical map from which users can freely select layers to display
in the interface. Each map contains one or more layers (raster or vector) that
can be toggled on/off.
Attributes:
id: Universal unique identifier of the map
name: Multilingual short title displayed as header in the frontend
slug: Short string identifying the map (human-readable)
time_range: Temporal range of the map's existence
layers: List of layer entities derived from this map
metadata: List of free-form metadata fields for contextual information
thumbnail: IIIF protocol URL linking to an image thumbnail
version: Version label formatted as "X.Y.Z" (major.minor.patch)
areas: References to areas the map is related to (must have at least one)
"""
name: MultiLingualValue
slug: str
time_range: RDETimeRange
layers: list[LayerReference] = field(default_factory=list)
metadata: list[FreeFormMetadata] = field(default_factory=list)
thumbnail: Optional[str] = None
version: Optional[str] = None
areas: list[AreaReference] = field(default_factory=list)
[docs]
@classmethod
def constructor_from_json_obj(cls, json_obj: dict) -> Self:
"""Construct a Map from a JSON object.
Args:
json_obj: Dictionary containing map data
Returns:
Map instance
"""
return cls(
id=UUIDEntity.parse_uuid(json_obj['id']),
name=MultiLingualValue(values=json_obj['name']),
slug=json_obj['slug'],
time_range=RDETimeRange(json_obj['start_time'], json_obj['end_time']),
layers=json_obj.get('layers', []),
metadata=[FreeFormMetadata(type=METADATA_TYPE_TO_ENUM.get(m['type'], MetadataType.STRING),
label=MultiLingualValue(values=m.get('label', {})),
value=MultiLingualValue(values=m.get('value', {}))) for m in json_obj.get('metadata', [])],
thumbnail=json_obj.get('thumbnail'),
extent=GeographicalExtent(json_obj.get('extent', [])),
version=json_obj.get('version'),
areas=json_obj.get('areas', [])
)
[docs]
def to_dict(self, exclude_fields = {}) -> dict:
"""Convert to dictionary representation.
Args:
exclude_fields: Set of field names to exclude
Returns:
Dictionary representation with serialized nested objects
"""
self.metadata = [v.to_dict() for v in self.metadata] if self.metadata else []
self.layers = [layer.get_ref() if isinstance(layer, Layer) else layer['id'] if isinstance(layer, dict) and 'id' in layer else layer for layer in self.layers]
return super().to_dict(exclude_fields=exclude_fields)
[docs]
@dataclass
class LayerConfigurationService:
"""Service configuration for accessing a layer's tiles.
Describes the URL and service type for accessing layer tiles from a geoserver.
Attributes:
url: URL on the geoserver that serves the tiles for the layer
type: Short string of the tile type (MVT, MVTS, XYZ, etc.)
"""
url: str
type: str
[docs]
@dataclass
class LayerConfiguration(RDE, UUIDEntity):
"""Configuration describing how a layer is served and accessed.
Defines the technical details for how the frontend can access and display
a layer, including service endpoints, zoom levels, and spatial extent.
Attributes:
id: Universal unique identifier of the layer configuration
service: Service configuration (URL and type) for accessing tiles
min_zoom_level: Minimum zoom level available for display
max_zoom_level: Maximum zoom level available for display
extent: Optional bounding box boundary of the layer
"""
service: LayerConfigurationService
min_zoom_level: int
max_zoom_level: int
extent: Optional[GeographicalExtent] = None
[docs]
@classmethod
def constructor_from_json_obj(cls, json_obj: dict) -> Self:
"""Construct a LayerConfiguration from a JSON object.
Args:
json_obj: Dictionary containing layer configuration data
Returns:
LayerConfiguration instance
"""
service_data = json_obj.get('service', {})
service = LayerConfigurationService(
url=service_data.get('url', ''),
type=service_data.get('type', '')
)
return cls(
id=UUIDEntity.parse_uuid(json_obj['id']),
service=service,
min_zoom_level=json_obj.get('min_zoom_level', 0),
max_zoom_level=json_obj.get('max_zoom_level', 22),
extent=GeographicalExtent(json_obj.get('extent', [])) if 'extent' in json_obj else None
)
[docs]
def to_dict(self, exclude_fields={}):
"""Convert to dictionary representation.
Args:
exclude_fields: Set of field names to exclude
Returns:
Dictionary representation with serialized service and extent
"""
self.service = {
'url': self.service.url,
'type': self.service.type
}
self.extent = self.extent.coordinates if self.extent else None
return super().to_dict(exclude_fields)
[docs]
@dataclass
class Layer(RDE, UUIDEntity):
"""A synthetic derivation from a map (raster or vector).
Represents an abstraction of objects that users can manipulate to display as a
2D planar field in the interface. Can be either a vectorization of specific
content from a map or the actual digital facsimile of the map.
Attributes:
id: Universal unique identifier of the layer
slug: Short string identifying the layer
name: Multilingual short title displayed in the frontend
description: Brief multilingual description displayed in overlay choices
time_range: Temporal range of the layer's existence
map: Reference to the map this layer is part of
type: Layer type (RASTER: image tiles, VECTOR: geometry entities)
layer_configurations: List of configurations for accessing this layer
"""
slug: str
name: MultiLingualValue
description: MultiLingualValue
time_range: RDETimeRange
map: MapReference
type: LayerType
layer_configurations: list[LayerConfiguration] = field(default_factory=list)
[docs]
@classmethod
def constructor_from_json_obj(cls, json_obj: dict) -> Self:
"""Construct a Layer from a JSON object.
Args:
json_obj: Dictionary containing layer data
Returns:
Layer instance
"""
return cls(
id=UUIDEntity.parse_uuid(json_obj['id']),
slug=json_obj['slug'],
name=MultiLingualValue(values=json_obj['name']),
description=MultiLingualValue(values=json_obj.get('description', {})),
time_range=RDETimeRange(json_obj['start_time'], json_obj['end_time']),
map=UUIDEntity.parse_uuid(json_obj['map']['id']) if 'map' in json_obj and isinstance(json_obj['map'], dict) else None,
type=LAYER_TYPE_TO_ENUM.get(json_obj.get('type', '').upper(), LayerType.RASTER),
layer_configurations=[LayerConfiguration.constructor_from_json_obj(lc) for lc in json_obj.get('layer_configurations', [])]
)
[docs]
def to_dict(self, exclude_fields={}):
"""Convert to dictionary representation.
Args:
exclude_fields: Set of field names to exclude
Returns:
Dictionary representation with serialized layer configurations
"""
self.layer_configurations = [lc.to_dict() for lc in self.layer_configurations] if self.layer_configurations else []
return super().to_dict()
[docs]
@dataclass
class Geometry(RDE, UUIDEntity):
"""Mathematical representation of a physical location as GPS coordinates.
Represents geographical areas tied to an Observation and Historical Record.
Can represent parcels, buildings, streets, courtyards, parishes, or any arbitrary
zone. Can also exist without being referenced by a record, existing only as part
of a vector Layer.
Attributes:
id: Universal unique identifier of the geometry
geometry: Shapely geometry object (Point, LineString, Polygon, Multi*)
part_of_layer: Optional reference to the layer this geometry belongs to
Raises:
ValueError: If the geometry is not valid according to Shapely validation
"""
geometry: GeometryType
part_of_layer: Optional[LayerReference] = None
def __post_init__(self):
if isinstance(self.geometry, dict):
self.geometry = shapely.from_geojson(json.dumps(self.geometry))
if not self.geometry.is_valid:
raise ValueError(f'Invalid geometry, because {shapely.validation.explain_validity(self.geometry)}')
[docs]
@classmethod
def constructor_from_json_obj(cls, json_obj: dict) -> Self:
"""Construct a Geometry from a JSON object.
Handles GeoJSON Feature format where properties and geometry are separate.
Args:
json_obj: Dictionary containing geometry data (typically GeoJSON Feature)
Returns:
Geometry instance
"""
# to note, the JSON object is a GeoJSON Feature object
props = json_obj.get('properties', {})
geometry = json_obj.get('geometry', {})
return cls(
id=UUIDEntity.parse_uuid(props.get('id', json_obj.get('id'))),
part_of_layer=UUIDEntity.parse_uuid(props.get('part_of_layer')) if 'part_of_layer' in props else None,
geometry=shapely.from_geojson(json.dumps(geometry)),
)
[docs]
@classmethod
def constructor_from_raw_geojson_line(cls, geojson_line: str, uuid: str, layer_uuid: str) -> Self:
"""Construct a Geometry from a raw GeoJSON line string.
Used for processing GeoJSON line-delimited files.
Args:
geojson_line: String containing a single GeoJSON object
uuid: UUID to assign to this geometry
layer_uuid: UUID of the layer this geometry belongs to
Returns:
Geometry instance
"""
json_obj = json.loads(geojson_line)
return cls(
id=UUIDEntity.parse_uuid(uuid),
has_layer=UUIDEntity.parse_uuid(layer_uuid),
geometry=shapely.from_geojson(json.dumps(json_obj.get('geometry', {}))),
)
[docs]
def to_dict(self) -> dict:
"""Convert to dictionary representation.
Returns:
Dictionary representation with geometry as GeoJSON dict
"""
result = super().to_dict()
result['geometry'] = json.loads(shapely.to_geojson(self.geometry))
return result
[docs]
@dataclass
class Area(RDE, UUIDEntity):
"""The boundary of a specific geographical entity.
Represents geographical boundaries of continents, countries, cities, or ad-hoc
administrative zones. Used to index maps and datasets to curated areas in the
Time Atlas, enabling spatial filtering and organization.
Attributes:
id: Universal unique identifier of the area
slug: Short string identifying the area (human-readable)
name: Multilingual name of the geographical entity
geometry: Shapely geometry object representing the boundary (typically Polygon)
"""
slug: str
name: MultiLingualValue
geometry: GeometryType
[docs]
@classmethod
def constructor_from_json_obj(cls, json_obj: dict) -> Self:
"""Construct an Area from a JSON object.
Args:
json_obj: Dictionary containing area data
Returns:
Area instance
"""
return cls(
id=UUIDEntity.parse_uuid(json_obj['id']),
name=MultiLingualValue(values=json_obj['name']),
geometry=shapely.from_geojson(json.dumps(json_obj.get('geometry', {})))
)