Source code for finesse.utilities.text

"""Text utilities."""

from __future__ import annotations
from collections.abc import Iterable
import spellchecker
from quantiphy import Quantity as _Quantity


[docs]def ngettext(n, fsingle, fplural, sub=True):
    """Get the singular or plural form of the specified messages based on n.

    Simplified version of the Python standard library function :func:`gettext.ngettext`.

    Parameters
    ----------
    n : int
        The number to use to decide which form to return.

    fsingle, fplural : str
        Single and plural templates.

    sub : bool, optional
        Substitute `n` into the templates. Defaults to `True`.

    Examples
    --------
    >>> ngettext(1, "{n} item", "{n} items")
    '1 item'

    >>> ngettext(5, "{n} item", "{n} items")
    '5 items'

    The template doesn't have to contain `{n}`:
    >>> ngettext(5, "item", "items")
    'items'

    Setting `sub=False` turns off substitution:
    >>> ngettext(5, "{n} item", "{n} items", sub=False)
    '{n} items'
    """
    if n == 1:
        return fsingle.format(n=n) if sub else fsingle
    return fplural.format(n=n) if sub else fplural


[docs]def option_list(sequence, final_sep="or", quotechar=None, sort=False, prefix=None):
    """Build a list from `sequence` with commas and a final "or".

    As in Python's error messages (e.g. "'func' missing 3 requied positional arguments:
    'a', 'b', and 'c'"), this function adds an Oxford comma for sequences of length > 2.

    Parameters
    ----------
    sequence : sequence
        The options to create a list with.

    final_sep : str, optional
        The final separator when `sequence` has more than one item. Defaults to `or`.

    quotechar : str, optional
        Quote the items in `sequence` with this character. Defaults to no quotes.

    sort : bool, optional
        Sort the items `sequence` alphabetically. Defaults to false.

    prefix : str, optional
        Concatenates the prefix with all items in `sequence`. Defaults to false.
    """
    sequence = sorted(sequence) if sort else list(sequence)
    if prefix:
        sequence = [prefix + item for item in sequence]

    if quotechar:
        sequence = [f"{quotechar}{item}{quotechar}" for item in sequence]

    if len(sequence) <= 1:
        return "".join(sequence)
    elif len(sequence) == 2:
        return f"{sequence[0]} {final_sep} {sequence[1]}"
    sequence[-1] = f"{final_sep} {sequence[-1]}"
    return ", ".join(sequence)


[docs]def format_section(header, body, ruler=True, ruler_char="="):
    """Format text in sections."""
    text = f"{header}\n"

    if ruler:
        text += f"{ruler_char * len(header)}\n"

    if body:
        text += f"\n{body}\n"

    return text


[docs]def format_bullet_list(items, indent=4, bullet_char="-"):
    """Format items into a bullet list."""
    pre = " " * indent
    return "\n".join([f"{pre}{bullet_char} {item}" for item in items])


[docs]def add_linenos(linenos, lines):
    """Add line numbers to the start of lines.

    Parameters
    ----------
    linenos : sequence of int
        The line numbers, in the same order as `lines`.

    lines : sequence of str
        The lines.

    Returns
    -------
    sequence of str
        The lines with prepended line numbers.
    """
    # Use as many columns as required to fit the largest line number.
    wlinenocol = max([len(str(lineno)) for lineno in linenos])
    return [f"{lineno:>{wlinenocol}}: {line}" for lineno, line in zip(linenos, lines)]


[docs]def stringify(item):
    """Recursively stringify `item`.

    This is useful for when it doesn't make sense or isn't possible to override the
    __repr__ method of an object to get a compact string representation.
    """
    if isinstance(item, (list, tuple)):
        return f"[{', '.join(stringify(i) for i in item)}]"
    return str(item)


[docs]def stringify_graph_gml(graph):
    """Convert the specified NetworkX graph to string representation using GML
    markup."""
    from io import BytesIO
    import networkx as nx

    graphbytes = BytesIO()
    nx.write_gml(graph, graphbytes, stringify)
    graphbytes.seek(0)

    return graphbytes.read().decode("utf-8")


[docs]def scale_si(number, units=None):
    """Convert `number` to an SI-scaled string representation, with optional unit.

    Examples
    --------
    >>> scale_si(123.45e-6)
    '123.45u'
    >>> scale_si(370e-6, units="m")
    '370 um'
    """
    return str(_Quantity(number, units=units))


[docs]def get_close_matches(
    word: str,
    options: Iterable[str],
    edit_distance: int = 2,
    case_sensitive: bool = True,
) -> Iterable[str] | None:
    """Wrapper around the py-spellchecker module. Filters words from `options` that are
    similar to `word`, using the 'Levenshtein distance'.

    Parameters
    ----------
    word : str
        word to match
    options : Iterable[str]
        Iterable to select matches from
    edit_distance : int, optional
        See https://en.wikipedia.org/wiki/Levenshtein_distance, by default 2
    case_sensitive : bool, optional
        Whether to consider different case different characters, by default True

    Returns
    -------
    Iterable[str] | None
        Words that are within `edit_distance` of `word`
    """
    checker = spellchecker.SpellChecker(
        language=None, case_sensitive=case_sensitive, distance=edit_distance
    )
    checker.word_frequency.load_words(options)
    candidates = checker.candidates(word)
    if candidates is not None:
        # spellchecker will return the word itself under some conditions
        candidates -= {word}
    return candidates