Source code for jdex.config

from __future__ import annotations

from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Literal
from jdex.owl.reasoning import PresetAxioms


DLProfile = Literal["EL", "ALC", "SROIQ", None]

@dataclass
class PathsConfig:
    """Configuration for dataset input/output paths.

    Attributes:
        schema (Path): Path to the ontology/schema file.
        data (Path): Path to the dataset/triples file.
        output (Path): Directory where processed outputs will be stored.
    """

    schema: Path
    data: Path
    output: Path

    @classmethod
    def from_dict(cls, data: dict[str, Any]) -> "PathsConfig":
        """Create a PathsConfig instance from a dictionary.

        Args:
            data (dict[str, Any]): Dictionary containing path definitions.

        Returns:
            PathsConfig: Initialized PathsConfig instance.
        """
        return cls(
            schema=Path(data["schema"]),
            data=Path(data["data"]),
            output=Path(data["output"]),
        )


@dataclass
class MaterializationConfig:
    """Configuration for TBox materialization reasoning.

    Attributes:
        enabled (bool): Whether materialization is enabled.
        axioms (list[str]): List of axioms to materialize.
        reasoner (str): Reasoner used for materialization ("hermit" or "elk").

    Raises:
        ValueError: If unsupported axioms or reasoner are provided.
    """

    enabled: bool = True
    axioms: list[str] = field(default_factory=PresetAxioms.tbox_materialization)
    reasoner: str = "hermit"

    def __post_init__(self) -> None:
        """Validate axioms and reasoner."""
        allowed_axioms = set(PresetAxioms.tbox_materialization())
        invalid_axioms = [axiom for axiom in self.axioms if axiom not in allowed_axioms]
        if invalid_axioms:
            raise ValueError(
                f"Unsupported axioms: {invalid_axioms}. "
                f"Allowed axioms are: {sorted(allowed_axioms)}"
            )

        allowed_reasoners = ["hermit", "elk"]
        if self.reasoner not in allowed_reasoners:
            raise ValueError(f"Unsupported reasoner {self.reasoner}")

    @classmethod
    def from_dict(cls, data: dict[str, Any] | None) -> "MaterializationConfig":
        """Create a MaterializationConfig from a dictionary.

        Args:
            data (dict[str, Any] | None): Configuration dictionary.

        Returns:
            MaterializationConfig: Initialized instance.
        """
        data = data or {}
        return cls(
            enabled=data.get("enabled", True),
            axioms=list(data.get("axioms", PresetAxioms.tbox_materialization())),
            reasoner=data.get("reasoner", "hermit"),
        )

    @classmethod
    def from_dict(cls, data: dict[str, Any] | None) -> "MaterializationConfig":
        data = data or {}
        return cls(
            enabled=data.get("enabled", True),
            axioms=list(data.get("axioms", PresetAxioms.tbox_materialization())),
            reasoner=data.get("reasoner", "hermit")
        )


@dataclass
class RealizationConfig:
    """Configuration for realization reasoning.

    Attributes:
        enabled (bool): Whether realization is enabled.
        reasoner (str): Reasoner used ("hermit", "konclude", or "elk").

    Raises:
        ValueError: If an unsupported reasoner is provided.
    """

    enabled: bool = True
    reasoner: str = "konclude"

    def __post_init__(self) -> None:
        """Validate reasoner."""
        allowed_reasoners = ["hermit", "konclude", "elk"]
        if self.reasoner not in allowed_reasoners:
            raise ValueError(f"Unsupported reasoner {self.reasoner}")

    @classmethod
    def from_dict(cls, data: dict[str, Any] | None) -> "RealizationConfig":
        """Create a RealizationConfig from a dictionary.

        Args:
            data (dict[str, Any] | None): Configuration dictionary.

        Returns:
            RealizationConfig: Initialized instance.
        """
        data = data or {}
        return cls(
            enabled=data.get("enabled", True),
            reasoner=data.get("reasoner", "konclude"),
        )
    

@dataclass
class ModularizationConfig:
    """Configuration for ontology modularization.

    Attributes:
        enabled (bool): Whether modularization is enabled.
    """

    enabled: bool = True

    @classmethod
    def from_dict(cls, data: dict[str, Any] | None) -> "ModularizationConfig":
        """Create a ModularizationConfig from a dictionary."""
        data = data or {}
        return cls(enabled=data.get("enabled", True))
    

@dataclass
class ConsistencyConfig:
    """Configuration for consistency checking.

    Attributes:
        convert_ntriples (bool): Whether to convert data to N-Triples before checking.
    """

    convert_ntriples: bool = False

    @classmethod
    def from_dict(cls, data: dict[str, Any] | None) -> "ConsistencyConfig":
        """Create a ConsistencyConfig from a dictionary."""
        data = data or {}
        return cls(convert_ntriples=data.get("convert_ntriples", False))
    

@dataclass
class DecompositionConfig:
    """Configuration for ontology decomposition.

    Attributes:
        tbox (bool): Whether to decompose the TBox.
        rbox (bool): Whether to decompose the RBox.
    """

    tbox: bool = True
    rbox: bool = True

    @classmethod
    def from_dict(cls, data: dict[str, Any] | None) -> "DecompositionConfig":
        """Create a DecompositionConfig from a dictionary."""
        data = data or {}
        return cls(
            rbox=data.get("rbox", True),
            tbox=data.get("tbox", True),
        )

@dataclass
class SatisfiabilityConfig:
    """Configuration for satisfiability checking.

    Attributes:
        filter_unsatisfiable (bool): Whether to remove unsatisfiable entities.
        reasoner (str): Reasoner used ("hermit" or "elk").

    Raises:
        ValueError: If an unsupported reasoner is provided.
    """

    filter_unsatisfiable: bool = False
    reasoner: str = "hermit"

    def __post_init__(self) -> None:
        """Validate reasoner."""
        allowed_reasoners = ["hermit", "elk"]
        if self.reasoner not in allowed_reasoners:
            raise ValueError(f"Unsupported reasoner {self.reasoner}")

    @classmethod
    def from_dict(cls, data: dict[str, Any] | None) -> "SatisfiabilityConfig":
        """Create a SatisfiabilityConfig from a dictionary."""
        data = data or {}
        return cls(
            filter_unsatisfiable=data.get("filter_unsatisfiable", False),
            reasoner=data.get("reasoner", "hermit"),
        )
    

[docs] @dataclass class ReasoningConfig: """Configuration for all reasoning services. Attributes: java_8_home (str | None): Path to Java 8 installation. java_11_home (str | None): Path to Java 11 installation. java_max_ram (int): Maximum RAM (GB) allocated to Java. satisfiability (SatisfiabilityConfig): Satisfiability settings. materialization (MaterializationConfig): Materialization settings. realization (RealizationConfig): Realization settings. modularization (ModularizationConfig): Modularization settings. decomposition (DecompositionConfig): Decomposition settings. consistency (ConsistencyConfig): Consistency settings. """ java_8_home: str | None = None java_11_home: str | None = None java_max_ram: int = 4 satisfiability: SatisfiabilityConfig = field(default_factory=SatisfiabilityConfig) materialization: MaterializationConfig = field(default_factory=MaterializationConfig) realization: RealizationConfig = field(default_factory=RealizationConfig) modularization: ModularizationConfig = field(default_factory=ModularizationConfig) decomposition: DecompositionConfig = field(default_factory=DecompositionConfig) consistency: ConsistencyConfig = field(default_factory=ConsistencyConfig)
[docs] @classmethod def from_dict(cls, data: dict[str, Any] | None) -> "ReasoningConfig": """Create a ReasoningConfig from a dictionary.""" data = data or {} return cls( java_max_ram=data.get("java_max_ram", 4), java_8_home=data["java_8_home"] if data.get("java_8_home") else None, java_11_home=data["java_11_home"] if data.get("java_11_home") else None, materialization=MaterializationConfig.from_dict(data.get("materialization")), realization=RealizationConfig.from_dict(data.get("realization")), modularization=ModularizationConfig.from_dict(data.get("modularization")), decomposition=DecompositionConfig.from_dict(data.get("decomposition")), consistency=ConsistencyConfig.from_dict(data.get("consistency")), satisfiability=SatisfiabilityConfig.from_dict(data.get("satisfiability")), )
@dataclass class TestLeakageFilteringConfig: """Configuration for test leakage filtering. Attributes: enabled (bool): Whether filtering is enabled. minimum_frequency (float): Threshold for filtering leakage. """ enabled: bool = True minimum_frequency: float = 0.97 @classmethod def from_dict(cls, data: dict[str, Any] | None) -> "TestLeakageFilteringConfig": """Create a TestLeakageFilteringConfig from a dictionary.""" data = data or {} return cls( enabled=data.get("enabled", True), minimum_frequency=data.get("minimum_frequency", 0.97), ) @dataclass class SplitConfig: """Configuration for dataset splitting. Attributes: enabled (bool): Whether splitting is enabled. train_percent (int): Percentage of training data. validation_percent (int): Percentage of validation data. test_percent (int): Percentage of test data. transductive (bool): Whether to use transductive splitting. test_leakage_filtering (TestLeakageFilteringConfig): Leakage filtering settings. Raises: ValueError: If percentages do not sum to 100 when enabled. """ enabled: bool = True train_percent: int = 80 validation_percent: int = 10 test_percent: int = 10 transductive: bool = True test_leakage_filtering: TestLeakageFilteringConfig = field(default_factory=TestLeakageFilteringConfig) def __post_init__(self) -> None: """Validate split percentages.""" if self.enabled: total = self.train_percent + self.validation_percent + self.test_percent if total != 100: raise ValueError(f"Split percentages must sum to 100, got {total}.") @classmethod def from_dict(cls, data: dict[str, Any] | None) -> "SplitConfig": """Create a SplitConfig from a dictionary.""" data = data or {} return cls( enabled=data.get("enabled", True), train_percent=data.get("train_percent", 80), validation_percent=data.get("validation_percent", 10), test_percent=data.get("test_percent", 10), transductive=data.get("transductive", True), test_leakage_filtering=TestLeakageFilteringConfig.from_dict( data.get("test_leakage_filtering") ), ) @dataclass class PostProcessingConfig: """Configuration for post-processing steps. Attributes: json_conversion (bool): Whether to export JSON files. id_mapping (bool): Whether to generate ID mappings. tsv_conversion (bool): Whether to export TSV files. """ json_conversion: bool = True id_mapping: bool = True tsv_conversion: bool = True @classmethod def from_dict(cls, data: dict[str, Any] | None) -> "PostProcessingConfig": """Create a PostProcessingConfig from a dictionary.""" data = data or {} return cls( json_conversion=data.get("json_conversion", True), id_mapping=data.get("id_mapping", True), tsv_conversion=data.get("tsv_conversion", True), )
[docs] @dataclass class JDEXConfig: """Top-level configuration for JDEX pipeline. Attributes: dataset_name (str): Name of the dataset. paths (PathsConfig): Input/output paths configuration. verbose (int): Verbosity level. interactive_shell (bool): Whether to enable interactive mode. reasoning (ReasoningConfig): Reasoning configuration. test_leakage_filtering (TestLeakageFilteringConfig): Global leakage filtering. split (SplitConfig): Dataset splitting configuration. description_logic_profile (DLProfile): DL profile ("EL", "ALC", "SROIQ", or None). post_processing (PostProcessingConfig): Post-processing configuration. Raises: ValueError: If an invalid description logic profile is provided. """ dataset_name: str paths: PathsConfig verbose: int = 1 interactive_shell: bool = True reasoning: ReasoningConfig = field(default_factory=ReasoningConfig) test_leakage_filtering: TestLeakageFilteringConfig = field(default_factory=TestLeakageFilteringConfig) split: SplitConfig = field(default_factory=SplitConfig) description_logic_profile: DLProfile = None post_processing: PostProcessingConfig = field(default_factory=PostProcessingConfig) def __post_init__(self) -> None: """Validate description logic profile.""" allowed_profiles = {"EL", "ALC", "SROIQ", None} if self.description_logic_profile not in allowed_profiles: raise ValueError( "description_logic_profile must be one of: " "'EL', 'ALC', 'SROIQ', or None." )
[docs] @classmethod def from_dict(cls, data: dict[str, Any]) -> "JDEXConfig": """Create a JDEXConfig from a dictionary. Args: data (dict[str, Any]): Configuration dictionary. Returns: JDEXConfig: Initialized configuration object. """ return cls( dataset_name=data["dataset_name"], verbose=data.get("verbose", 1), interactive_shell=data.get("interactive_shell", True), paths=PathsConfig.from_dict(data["paths"]), reasoning=ReasoningConfig.from_dict(data.get("reasoning")), test_leakage_filtering=TestLeakageFilteringConfig.from_dict( data.get("test_leakage_filtering") ), split=SplitConfig.from_dict(data.get("split")), description_logic_profile=data.get("description_logic_profile"), post_processing=PostProcessingConfig.from_dict( data.get("post_processing") ), )
[docs] def pretty_print(self) -> str: """Return a formatted string summarizing the configuration. Returns: str: Human-readable configuration summary. """ return f""" JDEX Configuration Summary -------------------------- GENERAL SETTINGS dataset_name: {self.dataset_name} verbose: {self.verbose} interactive_shell: {self.interactive_shell} PATHS SETTINGS schema: {self.paths.schema} data: {self.paths.data} output: {self.paths.output} REASONING SERVICES SETTINGS java_8_home: {self.reasoning.java_8_home} java_11_home: {self.reasoning.java_11_home} java_max_ram: {self.reasoning.java_max_ram} consistency: convert_ntriples: {self.reasoning.consistency.convert_ntriples} satisfiability: filter_unsatisfiable: {self.reasoning.satisfiability.filter_unsatisfiable} reasoner: {self.reasoning.satisfiability.reasoner} materialization: enabled: {self.reasoning.materialization.enabled} axioms: {self.reasoning.materialization.axioms} reasoner: {self.reasoning.materialization.reasoner} realization: enabled: {self.reasoning.realization.enabled} reasoner: {self.reasoning.realization.reasoner} modularization: enabled: {self.reasoning.modularization.enabled} decomposition: tbox: {self.reasoning.decomposition.tbox} rbox: {self.reasoning.decomposition.rbox} description_logic_profile: {self.description_logic_profile} MACHINE LEARNING SETTINGS split: enabled: {self.split.enabled} train_percent: {self.split.train_percent} validation_percent: {self.split.validation_percent} test_percent: {self.split.test_percent} transductive: {self.split.transductive} test_leakage_filtering: enabled: {self.split.test_leakage_filtering.enabled} minimum_frequency: {self.split.test_leakage_filtering.minimum_frequency} post_processing: json_conversion: {self.post_processing.json_conversion} id_mapping: {self.post_processing.id_mapping} tsv_conversion: {self.post_processing.tsv_conversion} """.strip()