Source code for timeatlas.RDEModel

from dataclasses import dataclass, field
import uuid
from shapely.geometry import Point, LineString, Polygon, MultiLineString, MultiPolygon
import shapely
import json
from typing import Optional, Self
from datetime import datetime
import pandas as pd
from .TAEnums import *

# Type aliases for improved code readability and type hints
type GeometryType = Point | LineString | Polygon | MultiLineString | MultiPolygon
"""Union type for all supported Shapely geometry types in the Time Atlas data model."""

type UUID = str
"""String representation of a Universal Unique Identifier (UUID)."""

type ObsReference = Observation | UUID
"""Reference to an Observation entity, either as an object or UUID string."""

type HRReference = HistoricalRecord | UUID
"""Reference to a HistoricalRecord entity, either as an object or UUID string."""

type DatasetReference = Dataset | UUID
"""Reference to a Dataset entity, either as an object or UUID string."""

type POIReference = PointOfInterest | UUID
"""Reference to a PointOfInterest entity, either as an object or UUID string."""

type GeometryReference = Geometry | UUID
"""Reference to a Geometry entity, either as an object or UUID string."""

type AreaReference = Area | UUID
"""Reference to an Area entity, either as an object or UUID string."""

type LayerReference = Layer | UUID
"""Reference to a Layer entity, either as an object or UUID string."""

type MapReference = Map | UUID
"""Reference to a Map entity, either as an object or UUID string."""


class UUIDManager():

    """Utility class for managing UUID generation and parsing in the Time Atlas data model.
    
    Provides methods for generating deterministic UUIDs based on dataset namespaces
    and parsing UUIDs from various formats (e.g., URLs). Ensures consistent handling
    of unique identifiers across all RDE entities.
    """

    namespace: UUID

    def __init__(self, namespace):
        match namespace:
            case str():
                if namespace.startswith('http'):
                    self.namespace = uuid.uuid5(uuid.NAMESPACE_URL, namespace)
                else:
                    raise ValueError(f"Invalid namespace string: {namespace}. Must be a URL starting with 'http'.")
            case uuid.UUID():
                self.namespace = namespace
            case _:
                raise ValueError(f"Invalid namespace type: {type(namespace)}. Must be a string URL or a uuid.UUID object.")

    @staticmethod
    def generate_uuid(namespace: Optional[uuid.UUID | str], value: Optional[str] = None) -> UUID:
        if namespace:
            manager = UUIDManager(namespace)
            return manager._generate_uuid(value)
        else:
            return str(uuid.uuid4())

    @staticmethod
    def is_valid_uuid(uuid_string: str) -> bool:
        '''
        Checks whether the string given as argument is a valid UUID format.
        Returns True if the string is a valid UUID (e.g., "80d80427-b711-5324-b1d1-4eeddb059269"), False otherwise.
        '''
        try:
            uuid.UUID(uuid_string)
            return True
        except (ValueError, AttributeError, TypeError):
            return False
        

    def _generate_uuid(self, value: Optional[str] = None) -> UUID:
        """Generate a deterministic UUID based on the namespace and value.
        
        Uses UUIDv5 algorithm to create a unique identifier that is consistent
        across different runs given the same namespace and value.
        
        Args:
            value: Optional string value to generate the UUID from (e.g., a specific identifier from the data) 

        Returns:
            A UUID string generated from the namespace and value
        """        
        
        if value is None:
            value = str(uuid.uuid4())  # fallback to random UUID if no value provided
        return str(uuid.uuid5(self.namespace, value))

[docs] @dataclass class UUIDEntity: """Base class for entities that have a unique identifier. All Research Data Entities (RDE) in the Time Atlas data model have a unique UUID generated from a UUIDv5 algorithm with a custom dataset-based namespace and a deterministic identifier from the data. Attributes: id: Universal unique identifier of the resource when provided to the constructor, it can be either a valid UUID string, a uuid.UUID object, or a tuple of (UUIDManager, value) to generate a deterministic UUID based on the manager's namespace and the provided value. If no ID is provided, a random UUID will be generated. """ id: Optional[UUID | str | tuple[UUIDManager, str]] def __post_init__(self): match self.id: case str(): if not UUIDManager.is_valid_uuid(self.id): raise ValueError(f"Invalid UUID string: \"{self.id}\". Must be a valid UUID format (e.g., \"80d80427-b711-5324-b1d1-4eeddb059269\").") case uuid.UUID(): self.id = str(self.id) case (UUIDManager(), str() as value): # this ensure that there is no collision of UUIDs across different RDE types, as the value is prefixed by the class name of the entity, and the namespace is the same for all entities of a dataset, so there will be no collision between different datasets either, as they have different namespaces. current_class_name = self.__class__.__name__.lower() value = f"{current_class_name}_{value}" self.id = self.id[0]._generate_uuid(value) case None: self.id = UUIDManager.generate_uuid(None) # generate random UUID if no ID provided case _: raise ValueError(f"Invalid ID type: {type(self.id)}. Must be a valid UUID string, a uuid.UUID object, or None.")
[docs] def get_ref(self) -> str: """Return the UUID reference of this entity. Returns: The UUID string of this entity """ return self.id
[docs] @classmethod def parse_uuid(cls, data_id: str) -> None: """Parse a UUID from various formats. In the API, unique IDs are often represented as URLs, but in the data model we keep only the UUID part. This method extracts the UUID from URL format. Args: data_id: The ID to parse, either as a UUID string or URL containing UUID Returns: The parsed UUID string """ # in the API, the unique id is often represented as a URL, but in the data model we want to keep only the UUID part, so we parse it here. if data_id.startswith('http'): return data_id.split('/')[-1] return data_id
[docs] @dataclass class RDE: """Base class for all Research Data Entities (RDE) in the Time Atlas data model. Provides common serialization and type handling methods for all RDE types. All RDE instances are related using numerical identifiers (UUIDs), which are transformed into URLs when ingested in the backend. """
[docs] def to_dict(self, exclude_fields = {}) -> dict: """Convert the RDE instance to a dictionary representation. Handles proper serialization of nested RDE objects, enums, and special types like MultiLingualValue and RDETimeRange. Automatically adds the rde_type field for main RDE entities. Args: exclude_fields: Set of field names to exclude from the output dictionary Returns: Dictionary representation of the RDE instance """ result = {} for field_name, field_value in self.__dict__.items(): if field_name in exclude_fields: continue match field_value: case UUIDEntity(): result[field_name] = field_value.get_ref() case RDE(): # works for dataset configuration as well, as it inherits from RDE, but does not have rde_type field, so it will not be added in the final dict result[field_name] = field_value.to_dict() case RDETimeRange(): result['start_time'] = field_value.start_time result['end_time'] = field_value.end_time case HeightInfo(): result['terrain_height'] = field_value.terrain result['building_height'] = field_value.building case list(): result[field_name] = [item.get_ref() if isinstance(item, RDE) else item for item in field_value] case Enum(): result[field_name] = field_value.value case MultiLingualValue(): result[field_name] = field_value.values case _: result[field_name] = field_value # dataset configuration and other special cases have no rde_type field, only doing it for main RDE types rde_name = self.__class__.__name__.lower() if rde_name in CLASS_NAME_TO_RDE: result['rde_type'] = CLASS_NAME_TO_RDE[rde_name].value return result
[docs] def get_type(self) -> Optional[RDEType]: """Get the RDE type enum value for this entity. Returns: The RDEType enum value or None if not a main RDE type """ rde_name = self.__class__.__name__.lower() return CLASS_NAME_TO_RDE.get(rde_name).value
[docs] @classmethod def constructor_from_json_obj(cls, json_obj: dict) -> Self: """Construct an RDE instance from a JSON object. This method should be implemented in subclasses to handle specific deserialization logic for each RDE type. Args: json_obj: Dictionary containing the RDE data Returns: An instance of the RDE subclass Raises: NotImplementedError: If not implemented in subclass """ raise NotImplementedError('This method should be implemented in subclasses')
[docs] @dataclass class RDETimeRange: """Represents a temporal range for RDE entities. Datetime values formatted as ISO 8601 strings representing the range of time existence for an RDE, denoting the starting and ending points. Used by Historical Records, Datasets, Maps, and Layers. Attributes: start_time: ISO 8601 formatted datetime string for the start of existence end_time: ISO 8601 formatted datetime string for the end of existence Raises: ValueError: If datetime format is invalid AssertionError: If start_time is greater than end_time """ start_time: str end_time: str def __post_init__(self): # Validate time format datetime.fromisoformat(self.start_time.replace("Z", "+00:00")) datetime.fromisoformat(self.end_time.replace("Z", "+00:00")) assert self.start_time <= self.end_time, "start_time must be less than or equal to end_time"
[docs] @dataclass class MultiLingualValue: """Container for multilingual text values following IIIF format. Stores text that can be expressed in multiple languages, similar to the IIIF format for multilingual descriptions. Each language can have multiple values. Attributes: values: Dictionary mapping language codes (2-3 letters) to lists of text values Example: {"en": ["English text"], "fr": ["Texte français"]} """ values: dict[str, list[str]]
[docs] @dataclass class MetadataFieldConfig(RDE): """Configuration for a metadata field in a Historical Record. Describes how the frontend and/or backend should manipulate values recorded in the HR's metadata property. Each field configuration specifies display properties, data type, indexing behavior, and semantic tags. Attributes: id: The metadata field name as it appears in the HR metadata dictionary type: The data type of the metadata field (STRING, INTEGER, FLOAT, LIST, URL) display_label: Multilingual label to display next to the value in the frontend nullable: Whether the field may hold empty values indexable: Whether the field is indexed in the search engine for full-text search short_display: Whether this field should be displayed by default in card view hidden: Whether the field should not be displayed to normal users tag: Broad category for this field (PEOPLE, PLACE, LAND_USE) for facet search paradata: How the data was acquired/produced for this specific field """ id: str type: Optional[MetadataType] = None display_label: MultiLingualValue = field(default_factory=MultiLingualValue) nullable: bool = field(default=True) indexable: bool = field(default=False) short_display: bool = field(default=False) hidden: bool = field(default=False) tag: Optional[MetadataTag] = None paradata: Optional[ParadataValues] = None
[docs] @classmethod def constructor_from_json_obj(cls, json_obj: dict) -> Self: """Construct a MetadataFieldConfig from a JSON object. Args: json_obj: Dictionary containing metadata field configuration data Returns: MetadataFieldConfig instance """ return cls( id=json_obj['id'], type=METADATA_TYPE_TO_ENUM.get(json_obj['type'], MetadataType.STRING), display_label=MultiLingualValue(values=json_obj.get('display_label', {})), nullable=json_obj.get('nullable', True), indexable=json_obj.get('indexable', False), short_display=json_obj.get('short_display', False), hidden=json_obj.get('hidden', False), tag=METADATA_TAG_TO_ENUM.get(json_obj['tag'], None), paradata=PARADATA_VALUE_TO_ENUM.get(json_obj['paradata'], None) )
[docs] @dataclass class FreeFormMetadata(RDE): """Free-form metadata field for Datasets and Maps. Holds arbitrary contextual information that relates to a Dataset or Map. Both the label and value are specified as multilingual entities since they are arbitrary and need localization support. Attributes: type: The metadata type (STRING, INTEGER, FLOAT, LIST, URL) label: Multilingual label for the metadata field value: Multilingual value for the metadata field """ type: MetadataType label: MultiLingualValue value: MultiLingualValue
[docs] def to_dict(self): """Convert to dictionary representation. Returns: Dictionary with type as string value and multilingual label/value """ return { 'type': self.type.value, 'label': self.label.values, 'value': self.value.values }
[docs] @dataclass class DatasetConfiguration(RDE): """Operational configuration for a Dataset. Describes how the Historical Records in a dataset should be handled and served through the information system. Includes metadata field configurations and display settings for the frontend. Attributes: metadata_field_config: List of configurations for each metadata field in HRs main_label: Formatting string indicating which metadata to use for main label sub_label: Formatting string for potential sub-label display display_thumbnail: Whether HRs have thumbnails that should be displayed external_source: Whether source button should forward to external URL """ metadata_field_config: list[MetadataFieldConfig] = field(default_factory=list) main_label: str = '' sub_label: str = '' display_thumbnail: bool = False external_source: bool = False
[docs] @classmethod def constructor_from_json_obj(cls, json_obj: dict) -> Self: """Construct a DatasetConfiguration from a JSON object. Args: json_obj: Dictionary containing dataset configuration data Returns: DatasetConfiguration instance """ return cls( metadata_field_config=[MetadataFieldConfig.constructor_from_json_obj(v) for v in json_obj.get('metadata_field_config', [])], main_label=json_obj.get('dataset_config', {}).get('main_label', ''), sub_label=json_obj.get('dataset_config', {}).get('sub_label', ''), display_thumbnail=json_obj.get('dataset_config', {}).get('display_thumbnail', False), external_source=json_obj.get('dataset_config', {}).get('external_source', False) )
[docs] def to_dict(self, exclude_fields={}): """Convert to dictionary representation. Args: exclude_fields: Set of field names to exclude Returns: Dictionary representation with serialized metadata field configs """ self.metadata_field_config = [v.to_dict() for v in self.metadata_field_config] if self.metadata_field_config else [] return super().to_dict(exclude_fields=exclude_fields)
[docs] @dataclass class Dataset(RDE, UUIDEntity): """A homogeneous collection of information ingested in the Time Machine system. Represents the link between research data and its numerical expression and exploitation. Allows users to access meta/paradata on the dataset level. The entity is tied to operational configuration describing how the entities forming the dataset should be handled and served. Attributes: id: Universal unique identifier of the dataset slug: Ad-hoc label identifying the dataset (human-readable) name: Multilingual short title displayed as header in the frontend time_range: Temporal range of the dataset's existence creation_time: Timestamp indicating when this version was created version: Version label formatted as "X.Y.Z" (major.minor.patch) sources: List of IIIF manifest UUIDs used to produce the dataset has_areas: References to areas the dataset is related to configuration: Metadata configuration for HRs from this dataset metadata: List of free-form metadata fields for contextual information hrs: List of Historical Records in this dataset (processing only) obs: List of Observations in this dataset (processing only) """ slug: str name: MultiLingualValue time_range: RDETimeRange creation_time: Optional[str] = None version: Optional[str] = None sources: list[str] = field(default_factory=list) has_areas: Optional[list[AreaReference]] = field(default_factory=list) configuration: DatasetConfiguration = field(default_factory=DatasetConfiguration) metadata: list[FreeFormMetadata] = field(default_factory=list) # fields that do not exist in the RDE data model, only there to make python processing easier: hrs: list['HistoricalRecord'] = field(default_factory=list) obs: list['Observation'] = field(default_factory=list)
[docs] @classmethod def constructor_from_json_obj(cls, json_obj: dict) -> Self: """Construct a Dataset from a JSON object. Args: json_obj: Dictionary containing dataset data Returns: Dataset instance """ config_data = json_obj.get('configuration') configuration = DatasetConfiguration.constructor_from_json_obj(config_data) if config_data else None return cls( id=UUIDEntity.parse_uuid(json_obj['id']), slug=json_obj['slug'], name=MultiLingualValue(values=json_obj['name']), metadata = [FreeFormMetadata(type=METADATA_TYPE_TO_ENUM.get(m['type'], MetadataType.STRING), label=MultiLingualValue(values=m.get('label', {})), value=MultiLingualValue(values=m.get('value', {}))) for m in json_obj.get('metadata', [])], time_range=RDETimeRange(json_obj['start_time'], json_obj['end_time']), configuration=configuration, creation_time=json_obj.get('creation_time', None), version=json_obj.get('version', None), sources=json_obj.get('sources', []), has_areas=json_obj.get('has_areas', []) )
# override to exclude specific fields
[docs] def to_dict(self, exclude_fields = {'hrs', 'obs'}) -> dict: """Convert to dictionary representation. Args: exclude_fields: Fields to exclude (defaults to processing-only fields) Returns: Dictionary representation with serialized nested objects """ self.configuration = self.configuration.to_dict() if self.configuration else None self.metadata = [v.to_dict() for v in self.metadata] if self.metadata else [] return super().to_dict(exclude_fields=exclude_fields)
[docs] def instantiate_all_rde_members(self, rde_list: list[RDE]) -> None: """Populate the hrs and obs lists from a list of RDE entities. Helper method for processing that adds all HRs and Observations belonging to this dataset to the internal lists. Args: rde_list: List of RDE entities to filter and add """ for rde in rde_list: if hasattr(rde, "dataset") and RDEType.dataset == self.id: match rde: case HistoricalRecord(): self.hrs.append(rde) case Observation(): self.obs.append(rde)
[docs] @dataclass class HistoricalRecord(RDE, UUIDEntity): """A single "atom" of knowledge from a historical document. An Historical Record represents a record of information about a place, location, or set of people found from a historical document. It is the source from which any information accessible through the Time Machine projects comes from. Examples: a census entry, a parcel listing row, a sentence from a research book, a photograph depicting an urban space. The granularity should be as precise as possible, with the source URL ideally being a IIIF annotation of the information from a document's scan. Attributes: id: Universal unique identifier of the historical record dataset: Reference to the dataset this HR belongs to time_range: Temporal range of the record's existence paradata: How the data was acquired (manual, semi-automatic, automatic) has_observations: List of observations documented in this historical source metadata: Dictionary of arbitrary key-value pairs storing all metadata rights_attribution: Optional rights and attribution information """ dataset: DatasetReference time_range: RDETimeRange paradata: ParadataValues has_observations: list[ObsReference] metadata: dict = field(default_factory=dict) rights_attribution: Optional[str] = None
[docs] @classmethod def constructor_from_json_obj(cls, json_obj: dict) -> Self: """Construct a HistoricalRecord from a JSON object. Args: json_obj: Dictionary containing historical record data Returns: HistoricalRecord instance """ return cls( id=UUIDEntity.parse_uuid(json_obj['id']), dataset=UUIDEntity.parse_uuid(json_obj['dataset']['id']), time_range=RDETimeRange(json_obj['start_time'], json_obj['end_time']), paradata=json_obj.get('paradata', ''), type=json_obj.get('type', ''), has_observations=json_obj.get('has_observations', []), metadata=json_obj.get('metadata', {}), rights_attribution=json_obj.get('rights_attribution') )
[docs] def actualize_observations_references(self, entity_list: dict[UUID, RDE]) -> None: """Replace observation UUID references with actual Observation objects. Args: entity_list: Dictionary mapping UUIDs to RDE instances """ self.has_observations = [entity_list[obs_ref] if isinstance(obs_ref, str) and obs_ref in entity_list else obs_ref for obs_ref in self.has_observations]
[docs] def to_dict(self, flatten_metadata:bool = False) -> dict: """Convert to dictionary representation. Args: flatten_metadata: If True, flatten metadata dict into top-level fields Returns: Dictionary representation with optional flattened metadata """ result = super().to_dict() # HR specific serialization for documents result['has_observations'] = [obs.get_ref() if isinstance(obs, RDE) else obs for obs in self.has_observations] if flatten_metadata: for k,v in self.metadata.items(): result[k] = v result.pop('metadata', None) return result
[docs] @classmethod def constructor_from_dataframe_row(cls, row:pd.Series) -> Self: """Construct a HistoricalRecord from a pandas DataFrame row. Automatically extracts metadata fields from columns not used by core attributes. Args: row: pandas Series representing a row from a DataFrame Returns: HistoricalRecord instance """ metadata_keys = set(row.index).difference({'uuid', 'dataset', 'start_time', 'end_time', 'paradata', 'type', 'has_observations', 'rights_attribution'}) metadata = {k: row[k] for k in metadata_keys} return cls( id=UUIDEntity.parse_uuid(row['id']), dataset=UUIDEntity.parse_uuid(row['dataset']), time_range=RDETimeRange(row['start_time'], row['end_time']), paradata=row.get('paradata', ''), type=row.get('type', ''), has_observations=row.get('has_observations', []), metadata=metadata, rights_attribution=row.get('rights_attribution') )
[docs] @dataclass class HeightInfo: """Elevation information for Points of Interest. Stores elevation data separated between terrain and building height, both expressed in meters. This information is typically referenced from Maptiler's Database and is used by the interface to correctly place POIs in 3D vision mode. Attributes: terrain: Height of the terrain in meters building: Height of the building in meters """ terrain: Optional[float] = None building: Optional[float] = None
[docs] @dataclass class PointOfInterest(RDE, UUIDEntity): """A point that has been observed by one or many observations. Points of Interest are what have been observed and relate to coordinate handles of observations to place on a map. They can be pointed to by multiple Observations from different datasets, acting as an aggregate by virtue of having observations located on the same exact coordinate space. Attributes: id: Universal unique identifier of the POI geometry: GPS coordinates of the POI as a Shapely Point height: Elevation information (terrain and building height) in meters """ geometry: Point height: HeightInfo
[docs] @classmethod def constructor_from_json_obj(cls, json_obj: dict) -> Self: """Construct a PointOfInterest from a JSON object. Handles both standard JSON and GeoJSON Feature formats. Args: json_obj: Dictionary containing POI data Returns: PointOfInterest instance """ geom = json_obj.get('geometry') json_obj = json_obj.get('properties', json_obj) # in case the JSON object is a GeoJSON Feature object return cls( id=UUIDEntity.parse_uuid(json_obj['id']), geometry=(shapely.from_geojson(json.dumps(geom)) if geom else None), height=HeightInfo( terrain=json_obj.get('terrain_height'), building=json_obj.get('building_height') ) )
[docs] @dataclass class Observation(RDE, UUIDEntity): """The space-time representation of information recorded in a historical source. An Observation is tied to a single point of physical space represented by a single latitude and longitude. It can be a physical location (e.g., cadastral parcel) or an event (e.g., apprenticeship). It serves as a pivot entity linking Historical Records, Points of Interest, and Geometries. A single HR can hold multiple Observations (e.g., a postcard showing multiple identified landmarks, where each landmark gets a dedicated Observation). Attributes: id: Universal unique identifier of the observation historical_record: Reference to the HR that attests to this observation's existence geometry: GPS coordinates of the observation as a Shapely Point has_geometries: List of geometry entities tied to this observation part_of_point_of_interest: Optional reference to the associated POI """ historical_record: HRReference geometry: Point has_geometries: list[GeometryReference] = field(default_factory=list) part_of_point_of_interest: Optional[POIReference] = None
[docs] def actualize_references(self, entity_list: dict[UUID, RDE]) -> None: """Replace UUID references with actual RDE objects. Args: entity_list: Dictionary mapping UUIDs to RDE instances """ self.has_geometries = [entity_list[geom_ref] if isinstance(geom_ref, str) and geom_ref in entity_list else geom_ref for geom_ref in self.has_geometries] self.part_of_point_of_interest = entity_list[self.part_of_point_of_interest] if isinstance(self.part_of_point_of_interest, str) and self.part_of_point_of_interest in entity_list else self.part_of_point_of_interest self.historical_record = entity_list[self.historical_record] if isinstance(self.historical_record, str) and self.historical_record in entity_list else self.historical_record
[docs] @classmethod def constructor_from_json_obj(cls, json_obj: dict) -> Self: """Construct an Observation from a JSON object. Handles both standard JSON and GeoJSON Feature formats. Args: json_obj: Dictionary containing observation data Returns: Observation instance """ geom = json_obj.get('geometry') json_obj = json_obj.get('properties', json_obj) # in case the JSON object is a GeoJSON Feature object return cls( id=UUIDEntity.parse_uuid(json_obj['id']), historical_record=json_obj.get('documented_in')[0] if isinstance(json_obj.get('documented_in'), list) and len(json_obj.get('documented_in')) > 0 else None, geometry=shapely.from_geojson(json.dumps(geom)), has_geometries=json_obj.get('has_geometries', []), # height=HeightInfo( # terrain=json_obj.get('height', {}).get('terrain'), # building=json_obj.get('height', {}).get('building') # ), part_of_point_of_interest=json_obj.get('part_of_point_of_interest', None) )
[docs] @dataclass class GeographicalExtent: """Bounding box representing the geographical extent of a map or layer. Stores the boundary coordinates in [min_x, min_y, max_x, max_y] format, representing the spatial limits of a geographical entity. Attributes: coordinates: List of four float values [min_x, min_y, max_x, max_y] Raises: AssertionError: If coordinates don't meet format requirements """ coordinates: list[float] def __post_init__(self): assert len(self.coordinates) == 4, "Extent must have four coordinates: [min_x, min_y, max_x, max_y]" assert self.coordinates[0] < self.coordinates[2], "min_x must be less than max_x" assert self.coordinates[1] < self.coordinates[3], "min_y must be less than max_y"
# note that the two asserts above would faile on map that are exactly on limits of the negative # latitude (-0.0) or longitude (-0.0), it is unlikey we ingest map from such zones (and most GIS # software specially avoid it: https://en.wikipedia.org/wiki/180th_meridian)
[docs] @dataclass class Map(RDE, UUIDEntity): """A group of geographical layers stemming from a single historical map. Represents a historical map from which users can freely select layers to display in the interface. Each map contains one or more layers (raster or vector) that can be toggled on/off. Attributes: id: Universal unique identifier of the map name: Multilingual short title displayed as header in the frontend slug: Short string identifying the map (human-readable) time_range: Temporal range of the map's existence layers: List of layer entities derived from this map metadata: List of free-form metadata fields for contextual information thumbnail: IIIF protocol URL linking to an image thumbnail version: Version label formatted as "X.Y.Z" (major.minor.patch) areas: References to areas the map is related to (must have at least one) """ name: MultiLingualValue slug: str time_range: RDETimeRange layers: list[LayerReference] = field(default_factory=list) metadata: list[FreeFormMetadata] = field(default_factory=list) thumbnail: Optional[str] = None version: Optional[str] = None areas: list[AreaReference] = field(default_factory=list)
[docs] @classmethod def constructor_from_json_obj(cls, json_obj: dict) -> Self: """Construct a Map from a JSON object. Args: json_obj: Dictionary containing map data Returns: Map instance """ return cls( id=UUIDEntity.parse_uuid(json_obj['id']), name=MultiLingualValue(values=json_obj['name']), slug=json_obj['slug'], time_range=RDETimeRange(json_obj['start_time'], json_obj['end_time']), layers=json_obj.get('layers', []), metadata=[FreeFormMetadata(type=METADATA_TYPE_TO_ENUM.get(m['type'], MetadataType.STRING), label=MultiLingualValue(values=m.get('label', {})), value=MultiLingualValue(values=m.get('value', {}))) for m in json_obj.get('metadata', [])], thumbnail=json_obj.get('thumbnail'), extent=GeographicalExtent(json_obj.get('extent', [])), version=json_obj.get('version'), areas=json_obj.get('areas', []) )
[docs] def to_dict(self, exclude_fields = {}) -> dict: """Convert to dictionary representation. Args: exclude_fields: Set of field names to exclude Returns: Dictionary representation with serialized nested objects """ self.metadata = [v.to_dict() for v in self.metadata] if self.metadata else [] self.layers = [layer.get_ref() if isinstance(layer, Layer) else layer['id'] if isinstance(layer, dict) and 'id' in layer else layer for layer in self.layers] return super().to_dict(exclude_fields=exclude_fields)
[docs] @dataclass class LayerConfigurationService: """Service configuration for accessing a layer's tiles. Describes the URL and service type for accessing layer tiles from a geoserver. Attributes: url: URL on the geoserver that serves the tiles for the layer type: Short string of the tile type (MVT, MVTS, XYZ, etc.) """ url: str type: str
[docs] @dataclass class LayerConfiguration(RDE, UUIDEntity): """Configuration describing how a layer is served and accessed. Defines the technical details for how the frontend can access and display a layer, including service endpoints, zoom levels, and spatial extent. Attributes: id: Universal unique identifier of the layer configuration service: Service configuration (URL and type) for accessing tiles min_zoom_level: Minimum zoom level available for display max_zoom_level: Maximum zoom level available for display extent: Optional bounding box boundary of the layer """ service: LayerConfigurationService min_zoom_level: int max_zoom_level: int extent: Optional[GeographicalExtent] = None
[docs] @classmethod def constructor_from_json_obj(cls, json_obj: dict) -> Self: """Construct a LayerConfiguration from a JSON object. Args: json_obj: Dictionary containing layer configuration data Returns: LayerConfiguration instance """ service_data = json_obj.get('service', {}) service = LayerConfigurationService( url=service_data.get('url', ''), type=service_data.get('type', '') ) return cls( id=UUIDEntity.parse_uuid(json_obj['id']), service=service, min_zoom_level=json_obj.get('min_zoom_level', 0), max_zoom_level=json_obj.get('max_zoom_level', 22), extent=GeographicalExtent(json_obj.get('extent', [])) if 'extent' in json_obj else None )
[docs] def to_dict(self, exclude_fields={}): """Convert to dictionary representation. Args: exclude_fields: Set of field names to exclude Returns: Dictionary representation with serialized service and extent """ self.service = { 'url': self.service.url, 'type': self.service.type } self.extent = self.extent.coordinates if self.extent else None return super().to_dict(exclude_fields)
[docs] @dataclass class Layer(RDE, UUIDEntity): """A synthetic derivation from a map (raster or vector). Represents an abstraction of objects that users can manipulate to display as a 2D planar field in the interface. Can be either a vectorization of specific content from a map or the actual digital facsimile of the map. Attributes: id: Universal unique identifier of the layer slug: Short string identifying the layer name: Multilingual short title displayed in the frontend description: Brief multilingual description displayed in overlay choices time_range: Temporal range of the layer's existence map: Reference to the map this layer is part of type: Layer type (RASTER: image tiles, VECTOR: geometry entities) layer_configurations: List of configurations for accessing this layer """ slug: str name: MultiLingualValue description: MultiLingualValue time_range: RDETimeRange map: MapReference type: LayerType layer_configurations: list[LayerConfiguration] = field(default_factory=list)
[docs] @classmethod def constructor_from_json_obj(cls, json_obj: dict) -> Self: """Construct a Layer from a JSON object. Args: json_obj: Dictionary containing layer data Returns: Layer instance """ return cls( id=UUIDEntity.parse_uuid(json_obj['id']), slug=json_obj['slug'], name=MultiLingualValue(values=json_obj['name']), description=MultiLingualValue(values=json_obj.get('description', {})), time_range=RDETimeRange(json_obj['start_time'], json_obj['end_time']), map=UUIDEntity.parse_uuid(json_obj['map']['id']) if 'map' in json_obj and isinstance(json_obj['map'], dict) else None, type=LAYER_TYPE_TO_ENUM.get(json_obj.get('type', '').upper(), LayerType.RASTER), layer_configurations=[LayerConfiguration.constructor_from_json_obj(lc) for lc in json_obj.get('layer_configurations', [])] )
[docs] def to_dict(self, exclude_fields={}): """Convert to dictionary representation. Args: exclude_fields: Set of field names to exclude Returns: Dictionary representation with serialized layer configurations """ self.layer_configurations = [lc.to_dict() for lc in self.layer_configurations] if self.layer_configurations else [] return super().to_dict()
[docs] @dataclass class Geometry(RDE, UUIDEntity): """Mathematical representation of a physical location as GPS coordinates. Represents geographical areas tied to an Observation and Historical Record. Can represent parcels, buildings, streets, courtyards, parishes, or any arbitrary zone. Can also exist without being referenced by a record, existing only as part of a vector Layer. Attributes: id: Universal unique identifier of the geometry geometry: Shapely geometry object (Point, LineString, Polygon, Multi*) part_of_layer: Optional reference to the layer this geometry belongs to Raises: ValueError: If the geometry is not valid according to Shapely validation """ geometry: GeometryType part_of_layer: Optional[LayerReference] = None def __post_init__(self): if isinstance(self.geometry, dict): self.geometry = shapely.from_geojson(json.dumps(self.geometry)) if not self.geometry.is_valid: raise ValueError(f'Invalid geometry, because {shapely.validation.explain_validity(self.geometry)}')
[docs] @classmethod def constructor_from_json_obj(cls, json_obj: dict) -> Self: """Construct a Geometry from a JSON object. Handles GeoJSON Feature format where properties and geometry are separate. Args: json_obj: Dictionary containing geometry data (typically GeoJSON Feature) Returns: Geometry instance """ # to note, the JSON object is a GeoJSON Feature object props = json_obj.get('properties', {}) geometry = json_obj.get('geometry', {}) return cls( id=UUIDEntity.parse_uuid(props.get('id', json_obj.get('id'))), part_of_layer=UUIDEntity.parse_uuid(props.get('part_of_layer')) if 'part_of_layer' in props else None, geometry=shapely.from_geojson(json.dumps(geometry)), )
[docs] @classmethod def constructor_from_raw_geojson_line(cls, geojson_line: str, uuid: str, layer_uuid: str) -> Self: """Construct a Geometry from a raw GeoJSON line string. Used for processing GeoJSON line-delimited files. Args: geojson_line: String containing a single GeoJSON object uuid: UUID to assign to this geometry layer_uuid: UUID of the layer this geometry belongs to Returns: Geometry instance """ json_obj = json.loads(geojson_line) return cls( id=UUIDEntity.parse_uuid(uuid), has_layer=UUIDEntity.parse_uuid(layer_uuid), geometry=shapely.from_geojson(json.dumps(json_obj.get('geometry', {}))), )
[docs] def to_dict(self) -> dict: """Convert to dictionary representation. Returns: Dictionary representation with geometry as GeoJSON dict """ result = super().to_dict() result['geometry'] = json.loads(shapely.to_geojson(self.geometry)) return result
[docs] @dataclass class Area(RDE, UUIDEntity): """The boundary of a specific geographical entity. Represents geographical boundaries of continents, countries, cities, or ad-hoc administrative zones. Used to index maps and datasets to curated areas in the Time Atlas, enabling spatial filtering and organization. Attributes: id: Universal unique identifier of the area slug: Short string identifying the area (human-readable) name: Multilingual name of the geographical entity geometry: Shapely geometry object representing the boundary (typically Polygon) """ slug: str name: MultiLingualValue geometry: GeometryType
[docs] @classmethod def constructor_from_json_obj(cls, json_obj: dict) -> Self: """Construct an Area from a JSON object. Args: json_obj: Dictionary containing area data Returns: Area instance """ return cls( id=UUIDEntity.parse_uuid(json_obj['id']), name=MultiLingualValue(values=json_obj['name']), geometry=shapely.from_geojson(json.dumps(json_obj.get('geometry', {}))) )