Skip to content

How to add your own schemes

idutils.ext

Extension class to collect and register new schemes via entrypoints.

In order to define your own custom schemes you can use the following entrypoint to register them:

[options.entry_points]
idutils.custom_schemes =
    my_new_scheme = my_module.get_scheme_config_func

The entry point 'my_new_scheme = my_module.get_scheme_config_func' defines an entry point named my_new_scheme pointing to the function my_module.get_scheme_config_func which returns the config for your new registered scheme.

That function must return a dictionary with the following format:

def get_scheme_config_func():
    return {
        # See examples in `idutils.validators` file.
        "validator": lambda value: True else False,
        # Used in `idutils.normalizers.normalize_pid` function.
        "normalizer": lambda value: normalized_value,
        # See examples in `idutils.detectors.IDUTILS_SCHEME_FILTER` config.
        "filter": ["list_of_schemes_to_filter_out"],
        # Used in `idutils.normalizers.to_url` function.
        "url_generator": lambda scheme, normalized_pid: "normalized_url",
    }

Each key is optional and if not provided a default value is defined in idutils.ext._set_default_custom_scheme_config() function.

Note

You can only add new schemes but not override existing ones.

CustomSchemesRegistry

Singleton class for loading and storing custom schemes from entry points.

Source code in idutils/ext.py
class CustomSchemesRegistry:
    """Singleton class for loading and storing custom schemes from entry points."""

    _instance = None
    _lock = Lock()  # To ensure thread-safe singleton creation

    def __new__(cls):
        """Create a new instance."""
        with cls._lock:
            if cls._instance is None:
                cls._instance = super().__new__(cls)
                cls._instance._custom_schemes_registry = (
                    {}
                )  # Internal dictionary to store schemes
                cls._instance._load_entry_points("idutils.custom_schemes")
        return cls._instance

    @property
    def custom_schemes(self):
        """Return the registered custom registered schemes.

        Each item of the registry is of the format:
            {
                "custom_scheme": {

                    # See examples in `idutils.validators` file.
                    "validator": lambda value: True else False,
                    # Used in `idutils.normalizers.normalize_pid` function.
                    "normalizer": lambda value: normalized_value,
                    # See examples in `idutils.detectors.IDUTILS_SCHEME_FILTER` config.
                    "filter": ["list_of_schemes_to_filter_out"],
                    # Used in `idutils.normalizers.to_url` function.
                    "url_generator": lambda scheme, normalized_pid: "normalized_url"

                }

            }

        """
        return self._custom_schemes_registry

    def pick_scheme_key(self, key):
        """Serialize the registered custom registered schemes by key.

        Return a list of tuples [(<scheme_name>, <scheme_config_key_value>)]
        """
        return [(scheme, config[key]) for scheme, config in self.custom_schemes.items()]

    def _load_entry_points(self, ep_name):
        """Load entry points into the internal registry."""
        existing_id_names = set(scheme[0] for scheme in IDUTILS_PID_SCHEMES)

        # Load entry points from the specified group
        for ep in set(entry_points(group=ep_name)):
            name = ep.name

            # Ensure no custom scheme overrides existing ones
            assert name not in existing_id_names, f"Scheme {name} already exists!"

            # Load the function from entry point
            scheme_register_func = ep.load()
            assert callable(scheme_register_func), f"{name} must be callable!"

            # Call the function to get the scheme config
            scheme_config = scheme_register_func()

            # Set default config values if needed
            scheme_config = _set_default_custom_scheme_config(scheme_config)

            # Store in the registry
            self._custom_schemes_registry.setdefault(name, scheme_config)

custom_schemes property

Return the registered custom registered schemes.

Each item of the registry is of the format

{ "custom_scheme": {

    # See examples in `idutils.validators` file.
    "validator": lambda value: True else False,
    # Used in `idutils.normalizers.normalize_pid` function.
    "normalizer": lambda value: normalized_value,
    # See examples in `idutils.detectors.IDUTILS_SCHEME_FILTER` config.
    "filter": ["list_of_schemes_to_filter_out"],
    # Used in `idutils.normalizers.to_url` function.
    "url_generator": lambda scheme, normalized_pid: "normalized_url"

}

}

pick_scheme_key(key)

Serialize the registered custom registered schemes by key.

Return a list of tuples [(, )]

Source code in idutils/ext.py
def pick_scheme_key(self, key):
    """Serialize the registered custom registered schemes by key.

    Return a list of tuples [(<scheme_name>, <scheme_config_key_value>)]
    """
    return [(scheme, config[key]) for scheme, config in self.custom_schemes.items()]

entry_points(group)

Entry points.

Copied here from invenio-base so that we do not introduce a dependency on invenio-base.

Source code in idutils/ext.py
def entry_points(group):
    """Entry points.

    Copied here from invenio-base so that we do not introduce a dependency on invenio-base.
    """
    if version_info < (3, 10):
        eps = m.entry_points()
        # the only reason to add this check is to simplify the tests! the tests
        # are implemented against python >=3.10 which uses the group keyword.
        # since we drop python3.9 soon, this should work!
        # in the tests there is a line which patches the return value of
        # importlib.metadata.entry_points with a list. this works for
        # python>=3.10 but not for 3.9
        # the return value of .get can contain duplicates. the simplest way to
        # remove is the set() call, to still return a list, list() is called on
        # set()
        if isinstance(eps, dict):
            eps = list(set(eps.get(group, [])))
    else:
        eps = m.entry_points(group=group)

    return eps