Skip to content

ftmq.util

clean_name(value) cached

Clean a value and only return it if it is a "name" in the sense of, doesn't contain exclusively of special chars

Examples: >>> clean_name(" foo Bar") "foo Bar" >>> clean_name("- - . *") None

Args: value: Any input that will be cleaned

Returns: The cleaned name or None

Source code in ftmq/util.py
@lru_cache(1024)
def clean_name(value: Any) -> str | None:
    """
    Clean a value and only return it if it is a "name" in the sense of, doesn't
    contain exclusively of special chars

    Examples:
        >>> clean_name("  foo\n Bar")
        "foo Bar"
        >>> clean_name("- - . *")
        None

    Args:
        value: Any input that will be [cleaned][ftmq.util.clean_string]

    Returns:
        The cleaned name or `None`
    """
    value = clean_string(value)
    if slugify(value) is None:
        return
    return value

clean_string(value) cached

Convert a value to None or a sanitized string without linebreaks

Examples: >>> clean_string(" foo bar") "foo bar" >>> clean_string("foo Bar, baz") "foo Bar, baz" >>> clean_string(None) None >>> clean_string("") None >>> clean_string(" ") None >>> clean_string(100) "100"

Args: value: Any input that will be converted to string

Returns: The cleaned value or None

Source code in ftmq/util.py
@lru_cache(1024)
def clean_string(value: Any) -> str | None:
    """
    Convert a value to `None` or a sanitized string without linebreaks

    Examples:
        >>> clean_string(" foo\n bar")
        "foo bar"
        >>> clean_string("foo Bar, baz")
        "foo Bar, baz"
        >>> clean_string(None)
        None
        >>> clean_string("")
        None
        >>> clean_string("  ")
        None
        >>> clean_string(100)
        "100"

    Args:
        value: Any input that will be converted to string

    Returns:
        The cleaned value or `None`
    """
    value = sanitize_text(value)
    if value is None:
        return
    return collapse_spaces(value)

get_country_code(value, splitter=',') cached

Get the 2-letter iso country code for an arbitrary country name

Examples:

>>> get_country_code("Germany")
"de"
>>> get_country_code("Deutschland")
"de"
>>> get_country_code("Berlin, Deutschland")
"de"
>>> get_country_code("Foo")
None

Parameters:

Name Type Description Default
value Any

Any input that will be cleaned

required
splitter str | None

Character to use to get text tokens to find country name for

','

Returns:

Type Description
str | None

The iso code or None

Source code in ftmq/util.py
@lru_cache(1024)
def get_country_code(value: Any, splitter: str | None = ",") -> str | None:
    """
    Get the 2-letter iso country code for an arbitrary country name

    Examples:
        >>> get_country_code("Germany")
        "de"
        >>> get_country_code("Deutschland")
        "de"
        >>> get_country_code("Berlin, Deutschland")
        "de"
        >>> get_country_code("Foo")
        None

    Args:
        value: Any input that will be [cleaned][ftmq.util.clean_string]
        splitter: Character to use to get text tokens to find country name for

    Returns:
        The iso code or `None`
    """
    value = clean_string(value)
    if not value:
        return
    code = registry.country.clean_text(value)
    if code:
        return code
    for token in value.split(splitter):
        code = registry.country.clean_text(token)
        if code:
            return code
    return

get_country_name(code) cached

Get the (english) country name for the given 2-letter iso code via pycountry

Examples:

>>> get_country_name("de")
"Germany"
>>> get_country_name("xx")
"xx"
>>> get_country_name("gb") == get_country_name("uk")
True  # United Kingdom

Parameters:

Name Type Description Default
alpha2

Two-letter iso code, case insensitive

required

Returns:

Type Description
str

Either the country name for a valid code or the code as fallback.

Source code in ftmq/util.py
@cache
def get_country_name(code: str) -> str:
    """
    Get the (english) country name for the given 2-letter iso code via
    [pycountry](https://pypi.org/project/pycountry/)

    Examples:
        >>> get_country_name("de")
        "Germany"
        >>> get_country_name("xx")
        "xx"
        >>> get_country_name("gb") == get_country_name("uk")
        True  # United Kingdom

    Args:
        alpha2: Two-letter iso code, case insensitive

    Returns:
        Either the country name for a valid code or the code as fallback.
    """
    code_clean = get_country_code(code)
    if code_clean is None:
        code_clean = code.lower()
    try:
        country = pycountry.countries.get(alpha_2=code_clean)
        if country is not None:
            return country.name
    except (LookupError, AttributeError):
        return code
    return code_clean

get_dehydrated_proxy(proxy)

Reduce proxy payload to only include caption property

Parameters:

Name Type Description Default
proxy CE

nomenklatura.entity.CompositeEntity

required

Returns:

Type Description
CE

A nomenklatura.entity.CompositeEntity with only the caption property.

Source code in ftmq/util.py
def get_dehydrated_proxy(proxy: CE) -> CE:
    """
    Reduce proxy payload to only include caption property

    Args:
        proxy: `nomenklatura.entity.CompositeEntity`

    Returns:
        A `nomenklatura.entity.CompositeEntity` with only the caption property.
    """
    return make_proxy(
        {
            "id": proxy.id,
            "schema": proxy.schema.name,
            "properties": get_proxy_caption_property(proxy),
            "datasets": proxy.datasets,
        }
    )

Reduce proxy payload to only include featured properties

Parameters:

Name Type Description Default
proxy CE

nomenklatura.entity.CompositeEntity

required

Returns:

Type Description
CE

A nomenklatura.entity.CompositeEntity with only the featured properties for its schema.

Source code in ftmq/util.py
def get_featured_proxy(proxy: CE) -> CE:
    """
    Reduce proxy payload to only include featured properties

    Args:
        proxy: `nomenklatura.entity.CompositeEntity`

    Returns:
        A `nomenklatura.entity.CompositeEntity` with only the featured
            properties for its schema.
    """
    featured = get_dehydrated_proxy(proxy)
    for prop in proxy.schema.featured:
        featured.add(prop, proxy.get(prop))
    return featured

get_statements(proxy, *datasets)

Get statements from a nomenklatura.entity.CompositeEntity with multiple datasets if needed

Parameters:

Name Type Description Default
proxy CE

nomenklatura.entity.CompositeEntity

required
*datasets str

Any (additional) datasets to create statements for

()

Yields:

Type Description
SGenerator

A generator of nomenklatura.statement.Statement

Source code in ftmq/util.py
def get_statements(proxy: CE, *datasets: str) -> SGenerator:
    """
    Get statements from a `nomenklatura.entity.CompositeEntity` with multiple
    datasets if needed

    Args:
        proxy: `nomenklatura.entity.CompositeEntity`
        *datasets: Any (additional) datasets to create statements for

    Yields:
        A generator of `nomenklatura.statement.Statement`
    """
    datasets = datasets or ("default",)
    for dataset in datasets:
        # FIXME
        for stmt in Statement.from_entity(proxy, dataset):
            stmt = stmt.to_dict()
            stmt["target"] = stmt.get("target") or False
            stmt["external"] = stmt.get("external") or False
            stmt = Statement.from_dict(stmt)
            yield stmt

get_year_from_iso(value)

Extract the year from a iso date string or datetime object.

Examples:

>>>  get_year_from_iso(None)
None
>>>  get_year_from_iso("2023")
2023
>>>  get_year_from_iso(2020)
2020
>>>  get_year_from_iso(datetime.now())
2024
>>>  get_year_from_iso("2000-01")
2000

Parameters:

Name Type Description Default
value Any

Any input that will be cleaned

required

Returns:

Type Description
int | None

The year or None

Source code in ftmq/util.py
def get_year_from_iso(value: Any) -> int | None:
    """
    Extract the year from a iso date string or `datetime` object.

    Examples:
        >>>  get_year_from_iso(None)
        None
        >>>  get_year_from_iso("2023")
        2023
        >>>  get_year_from_iso(2020)
        2020
        >>>  get_year_from_iso(datetime.now())
        2024
        >>>  get_year_from_iso("2000-01")
        2000

    Args:
        value: Any input that will be [cleaned][ftmq.util.clean_string]

    Returns:
        The year or `None`
    """
    value = clean_string(value)
    if not value:
        return
    try:
        return int(str(value)[:4])
    except ValueError:
        return

join_slug(*parts, prefix=None, sep='-', strict=True, max_len=255)

Create a stable slug from parts with optional validation

Examples:

>>> join_slug("foo", "bar")
"foo-bar"
>>> join_slug("foo", None, "bar")
None
>>> join_slug("foo", None, "bar", strict=False)
"foo-bar"
>>> join_slug("foo", "bar", sep="_")
"foo_bar"
>>> join_slug("a very long thing", max_len=15)
"a-very-5c156cf9"

Parameters:

Name Type Description Default
*parts str | None

Multiple (ordered) parts to compute the slug from

()
prefix str | None

Add a prefix to the slug

None
sep str

Parts separator

'-'
strict bool

Ensure all parts are not None

True
max_len int

Maximum length of the slug. If it exceeds, the returned value will get a computed hash suffix

255

Returns:

Type Description
str | None

The computed slug or None if validation fails

Source code in ftmq/util.py
def join_slug(
    *parts: str | None,
    prefix: str | None = None,
    sep: str = "-",
    strict: bool = True,
    max_len: int = 255,
) -> str | None:
    """
    Create a stable slug from parts with optional validation

    Examples:
        >>> join_slug("foo", "bar")
        "foo-bar"
        >>> join_slug("foo", None, "bar")
        None
        >>> join_slug("foo", None, "bar", strict=False)
        "foo-bar"
        >>> join_slug("foo", "bar", sep="_")
        "foo_bar"
        >>> join_slug("a very long thing", max_len=15)
        "a-very-5c156cf9"

    Args:
        *parts: Multiple (ordered) parts to compute the slug from
        prefix: Add a prefix to the slug
        sep: Parts separator
        strict: Ensure all parts are not `None`
        max_len: Maximum length of the slug. If it exceeds, the returned value
            will get a computed hash suffix

    Returns:
        The computed slug or `None` if validation fails
    """
    sections = [slugify(p, sep=sep) for p in parts]
    if strict and None in sections:
        return None
    texts = [p for p in sections if p is not None]
    if not len(texts):
        return None
    prefix = slugify(prefix, sep=sep)
    if prefix is not None:
        texts = [prefix, *texts]
    slug = sep.join(texts)
    if len(slug) <= max_len:
        return slug
    # shorten slug but ensure uniqueness
    ident = make_entity_id(slug)[:8]
    slug = slug[: max_len - 9].strip(sep)
    return f"{slug}-{ident}"

make_fingerprint(value) cached

Create a stable but simplified string or None from input that can be used to generate ids (to mimic fingerprints.generate which is unstable for IDs as its algorithm could change)

Examples:

>>> make_fingerprint("Mrs. Jane Doe")
"doe jane mrs"
>>> make_fingerprint("Mrs. Jane Mrs. Doe")
"doe jane mrs"
>>> make_fingerprint("#")
None
>>> make_fingerprint(" ")
None
>>> make_fingerprint("")
None
>>> make_fingerprint(None)
None

Parameters:

Name Type Description Default
value Any

Any input that will be cleaned

required

Returns:

Type Description
str | None

The simplified string (fingerprint) or None if value is not feasible to fingerprint.

Source code in ftmq/util.py
@lru_cache(1024)
def make_fingerprint(value: Any) -> str | None:
    """
    Create a stable but simplified string or `None` from input that can be used
    to generate ids (to mimic `fingerprints.generate` which is unstable for IDs
    as its algorithm could change)

    Examples:
        >>> make_fingerprint("Mrs. Jane Doe")
        "doe jane mrs"
        >>> make_fingerprint("Mrs. Jane Mrs. Doe")
        "doe jane mrs"
        >>> make_fingerprint("#")
        None
        >>> make_fingerprint(" ")
        None
        >>> make_fingerprint("")
        None
        >>> make_fingerprint(None)
        None

    Args:
        value: Any input that will be [cleaned][ftmq.util.clean_name]

    Returns:
        The simplified string (fingerprint) or `None` if value is not feasible
            to fingerprint.
    """
    value = clean_name(value)
    if value is None:
        return
    return " ".join(sorted(set(slugify(value).split("-"))))

make_fingerprint_id(*values) cached

Compute a hash id based on values fingerprints

Parameters:

Name Type Description Default
*values Any

Parts to compute id from that will be fingerprinted

()

Returns:

Type Description
str | None

The computed hash id or None if a parts fingerprinted value is None

Source code in ftmq/util.py
@lru_cache(1024)
def make_fingerprint_id(*values: Any) -> str | None:
    """
    Compute a hash id based on values fingerprints

    Args:
        *values: Parts to compute id from that will be
            [fingerprinted][ftmq.util.make_fingerprint]

    Returns:
        The computed hash id or `None` if a parts fingerprinted value is `None`
    """
    return make_entity_id(*map(make_fingerprint, values))

make_proxy(data, dataset=None)

Create a nomenklatura.entity.CompositeEntity from a json dict.

Parameters:

Name Type Description Default
data dict[str, Any]

followthemoney data dict that represents entity data.

required
dataset str | Dataset | None

A default dataset

None

Returns:

Type Description
CE

The composite entity proxy

Source code in ftmq/util.py
def make_proxy(data: dict[str, Any], dataset: str | Dataset | None = None) -> CE:
    """
    Create a `nomenklatura.entity.CompositeEntity` from a json dict.

    Args:
        data: followthemoney data dict that represents entity data.
        dataset: A default dataset

    Returns:
        The composite entity proxy
    """
    datasets = ensure_list(data.pop("datasets", None))
    if dataset is not None:
        if isinstance(dataset, str):
            dataset = make_dataset(dataset)
        datasets.append(dataset.name)
    elif datasets:
        dataset = datasets[0]
        dataset = make_dataset(dataset)
    else:
        dataset = DefaultDataset
    proxy = CompositeEntity(dataset, data)
    if len(datasets) > 1:
        if proxy.id is None:
            raise ValidationError("Entity has no ID.")
        statements = get_statements(proxy, *datasets)
        return CompositeEntity.from_statements(dataset, statements)
    return proxy

make_string_id(*values) cached

Compute a hash id based on values

Parameters:

Name Type Description Default
*values Any

Parts to compute id from that will be cleaned

()

Returns:

Type Description
str | None

The computed hash id or None if a parts cleaned value is None

Source code in ftmq/util.py
@lru_cache(1024)
def make_string_id(*values: Any) -> str | None:
    """
    Compute a hash id based on values

    Args:
        *values: Parts to compute id from that will be
            [cleaned][ftmq.util.clean_name]

    Returns:
        The computed hash id or `None` if a parts cleaned value is `None`
    """
    return make_entity_id(*map(clean_name, values))

prop_is_numeric(schema, prop) cached

Indicate if the given property is numeric type

Parameters:

Name Type Description Default
schema Schema

followthemoney schema

required
prop str

Property

required

Returns:

Type Description
bool

False if the property is not numeric type or not found in the schema at all

Source code in ftmq/util.py
@cache
def prop_is_numeric(schema: Schema, prop: str) -> bool:
    """
    Indicate if the given property is numeric type

    Args:
        schema: followthemoney schema
        prop: Property

    Returns:
        `False` if the property is not numeric type or not found in the schema
            at all
    """
    prop_ = schema.get(prop)
    if prop_ is not None:
        return prop_.type == registry.number
    return False

to_numeric(value)

Convert a string value into a primitive numeric dtype (int or float) taking US and DE formatting into account via regex

Examples:

>>> to_numeric("1")
1
>>> to_numeric("1.0")
1
>>> to_numeric("1.1")
1.1
>>> to_numeric("1,101,000")
1101000
>>> to_numeric("1.000,1")
1000.1
>>> to_numeric("foo")
None

Parameters:

Name Type Description Default
value str

The input

required

Returns:

Type Description
float | int | None

The converted number or None if conversion fails

Source code in ftmq/util.py
def to_numeric(value: str) -> float | int | None:
    """
    Convert a string value into a primitive numeric dtype (`int` or `float`)
    taking US and DE formatting into account via regex

    Examples:
        >>> to_numeric("1")
        1
        >>> to_numeric("1.0")
        1
        >>> to_numeric("1.1")
        1.1
        >>> to_numeric("1,101,000")
        1101000
        >>> to_numeric("1.000,1")
        1000.1
        >>> to_numeric("foo")
        None

    Args:
        value: The input

    Returns:
        The converted number or `None` if conversion fails
    """
    value = str(value).strip()
    try:
        value = float(value)
        if int(value) == value:
            return int(value)
        return value
    except ValueError:
        if re.match(NUMERIC_US, value):
            return to_numeric(value.replace(",", ""))
        if re.match(NUMERIC_DE, value):
            return to_numeric(value.replace(".", "").replace(",", "."))