Source code for rsmtool.utils.notebook

"""
Utility functions for use in RSMTool sections/notebooks.

:author: Jeremy Biggs (jbiggs@ets.org)
:author: Anastassia Loukina (aloukina@ets.org)
:author: Nitin Madnani (nmadnani@ets.org)

:organization: ETS
"""

import re
from math import ceil
from os.path import exists, isabs, relpath
from pathlib import Path
from string import Template
from textwrap import wrap
from typing import Dict, List, Optional, Tuple, Union

from IPython.display import HTML, display

from .constants import INTERMEDIATE_FILES_TO_DESCRIPTIONS

INTERMEDIATE_TABLE_ROW_STRING = """
<tr>
    <td style="text-align:left;">
        <a href="{}" style="text-decoration: none;" download>{}</a>
    </td>
    <td style="text-align:left;">{}</td>
</tr>
"""


def float_format_func(num: float, prec: int = 3, scientific: bool = False) -> str:
    """
    Format given float to the specified precision as a string.

    Parameters
    ----------
    num : float
        The floating point number to format.
    prec: int
        The number of decimal places to use when displaying the number.
        Defaults to 3.
    scientific: bool
        Whether to display the number in scientific notiation if the rounded
        version is "0.000".
        Defaults to ``False``.

    Returns
    -------
    ans : str
        The formatted string representing the given number.
    """
    rounded_formatter_string = Template("{:.${prec}f}").substitute(prec=prec)
    scientific_formatter_string = Template("{:.${prec}e}").substitute(prec=prec)
    ans = rounded_formatter_string.format(num)
    if ans == "0.000" and scientific:
        ans = scientific_formatter_string.format(num)
    return ans


def int_or_float_format_func(num: Union[int, float], prec: int = 3) -> str:
    """
    Identify whether the number is float or integer.

    When displaying integers, use no decimal. For a float, round to the
    specified number of decimal places and convert to a string.

    Parameters
    ----------
    num : Union[int, float]
        The number to format and display.
    prec : int
        The number of decimal places to display if x is a float.
        Defaults to 3.

    Returns
    -------
    ans : str
        The formatted string representing the given number.
    """
    if float.is_integer(num):
        ans = f"{int(num)}"
    else:
        ans = float_format_func(num, prec=prec)
    return ans


def custom_highlighter(
    num: float,
    low: float = 0,
    high: float = 1,
    prec: int = 3,
    absolute: bool = False,
    span_class: str = "bold",
) -> str:
    """
    Convert float to an HTML <span> element with given class.

    The conversion only happens if the given float is below ``low``
    or above ``high``. If it does not meet those constraints, then
    a plain string is returned with specified number of decimal places.

    Parameters
    ----------
    num : float
        The floating point number to format.
    low : float
        The number will be displayed as an HTML span it is below this value.
        Defaults to 0.
    high : float
        The number will be displayed as an HTML span it is above this value.
        Defaults to 1.
    prec : int
        The number of decimal places to display for x. Defaults to 3.
    absolute : bool
        If ``True``, use the absolute value of x for comparison.
        Defaults to ``False``.
    span_class : str
        One of ``"bold"`` or ``"color"``. These are the two classes available
        for the HTML span tag.
        Defaults to ``"bold"``.

    Returns
    -------
    ans : str
        The plain or HTML string representing the given number.
    """
    abs_num = abs(num) if absolute else num
    val = float_format_func(num, prec=prec)
    ans = (
        f'<span class="highlight_{span_class}">{val}</span>'
        if abs_num < low or abs_num > high
        else val
    )
    return ans


def bold_highlighter(
    num: float, low: float = 0, high: float = 1, prec: int = 3, absolute: bool = False
) -> str:
    """
    Instantiate a ``custom_highlighter()`` with "bold" as default class.

    Parameters
    ----------
    num : float
        The floating point number to format.
    low : float
        The number will be displayed as an HTML span it is below this value.
        Defaults to 0.
    high : float
        The number will be displayed as an HTML span it is above this value.
        Defaults to 1.
    prec : int
        The number of decimal places to display for x.
        Defaults to 3.
    absolute : bool
        If ``True``, use the absolute value of x for comparison.
        Defaults to ``False``.

    Returns
    -------
    ans : str
        The formatted highlighter with bold class as default.
    """
    ans = custom_highlighter(num, low, high, prec, absolute, "bold")
    return ans


def color_highlighter(
    num: float, low: float = 0, high: float = 1, prec: int = 3, absolute: bool = False
) -> str:
    """
    Instantiate a ``custom_highlighter()`` with "color" as the default class.

    Parameters
    ----------
    num : float
        The floating point number to format.
    low : float
        The number will be displayed as an HTML span it is below this value.
        Defaults to 0.
    high : float
        The number will be displayed as an HTML span it is above this value.
        Defaults to 1.
    prec : int
        The number of decimal places to display for x.
        Defaults to 3.
    absolute : bool
        If ``True``, use the absolute value of x for comparison.
        Defaults to ``False``.

    Returns
    -------
    ans : str
        The formatted highlighter with color class as default.
    """
    ans = custom_highlighter(num, low, high, prec, absolute, "color")
    return ans


def compute_subgroup_plot_params(
    group_names: List[str], num_plots: int
) -> Tuple[int, int, int, int, List[str]]:
    """
    Compute subgroup plot and figure parameters.

    The parameters are computed based on the number of subgroups and the
    number of plots to be generated.

    Parameters
    ----------
    group_names : List[str]
        A list of subgroup names for plots.
    num_plots : int
        The number of plots to compute.

    Returns
    -------
    figure_width : int
        The width of the figure.
    figure_height : int
        The height of the figure.
    num_rows : int
        The number of rows for the plots.
    num_columns : int
        The number of columns for the plots.
    wrapped_group_names : List[str]
        A list of group names for plots.
    """
    wrapped_group_names = ["\n".join(wrap(str(gn), 20)) for gn in group_names]
    plot_height = 4 if wrapped_group_names == group_names else 6
    num_groups = len(group_names)
    if num_groups <= 6:
        num_columns = 2
        num_rows = ceil(num_plots / num_columns)
        figure_width = num_columns * num_groups
        figure_height = plot_height * num_rows
    else:
        num_columns = 1
        num_rows = num_plots
        figure_width = 10
        figure_height = plot_height * num_plots

    return (figure_width, figure_height, num_rows, num_columns, wrapped_group_names)



[docs]
def get_thumbnail_as_html(
    path_to_image: str, image_id: int, path_to_thumbnail: Optional[str] = None
) -> str:
    """
    Generate HTML for a clickable thumbnail of given image.

    Given the path to an image file, generate the HTML for
    a clickable thumbnail version of the image. When clicked,
    this HTML will open the full-sized version of the image in
    a new window.

    Parameters
    ----------
    path_to_image : str
        The absolute or relative path to the image. If an absolute path is
        provided, it will be converted to a relative path.
    image_id : int
        The id of the <img> tag in the HTML. This must be unique for each <img> tag.
    path_to_thumbnail : Optional[str]
        If you would like to use a different thumbnail image, specify the path
        to this thumbnail.
        Defaults to ``None``.

    Returns
    -------
    image : str
        The HTML string generated for the image.

    Raises
    ------
    FileNotFoundError
        If the image file cannot be located.
    """
    error_message = "The file `{}` could not be located."
    if not exists(path_to_image):
        raise FileNotFoundError(error_message.format(path_to_image))

    # check if the path is relative or absolute
    if isabs(path_to_image):
        rel_image_path = relpath(path_to_image)
    else:
        rel_image_path = path_to_image

    # if `path_to_thumbnail` is None, use `path_to_image`;
    # otherwise, get the relative path to the thumbnail
    if path_to_thumbnail is None:
        rel_thumbnail_path = rel_image_path
    else:
        if not exists(path_to_thumbnail):
            raise FileNotFoundError(error_message.format(path_to_thumbnail))
        rel_thumbnail_path = relpath(path_to_thumbnail)

    # specify the thumbnail style
    style = """
    <style>
    img {
        border: 1px solid #ddd;
        border-radius: 4px;
        padding: 5px;
        width: 150px;
        cursor: pointer;
    }
    </style>
    """

    # on click, open larger image in new window
    script = """
    <script>
    function getPicture(picpath) {
        window.open(picpath, 'Image', resizable=1);
    };
    </script>"""

    # generate image tags
    image = (
        f"""<img id='{image_id}' src='{rel_image_path}' """
        f"""onclick='getPicture("{rel_thumbnail_path}")' """
        f"""title="Click to enlarge"></img>"""
    )

    # create the image HTML
    image += style
    image += script
    return image




[docs]
def show_thumbnail(
    path_to_image: str, image_id: int, path_to_thumbnail: Optional[str] = None
) -> None:
    """
    Display the HTML for an image thumbnail in a Jupyter notebook.

    Given the path to an image file, generate the HTML for its
    thumbnail and display it in the notebook.

    Parameters
    ----------
    path_to_image : str
        The absolute or relative path to the image. If an absolute path is
        provided, it will be converted to a relative path.
    image_id : int
        The id of the <img> tag in the HTML. This must be unique for each <img> tag.
    path_to_thumbnail : Optional[str]
        If you would like to use a different thumbnail image, specify the path
        to the thumbnail.
        Defaults to ``None``.
    """
    display(HTML(get_thumbnail_as_html(path_to_image, image_id, path_to_thumbnail)))



def get_files_as_html(
    output_dir: str, experiment_id: str, file_format: str, replace_dict: Dict[str, str] = {}
) -> str:
    """
    Generate an HTML list for each output file in given directory.

    Optionally pass a replacement dictionary to use more descriptive
    titles for the file names.

    Parameters
    ----------
    output_dir : str
        The output directory.
    experiment_id : str
        The experiment ID.
    file_format : str
        The format of the output files.
    replace_dict : Dict[str, str]
        A dictionary which maps file names to descriptions.
        Defaults to ``{}``.

    Returns
    -------
    html_string : str
        HTML string with file descriptions and links.
    """
    output_path = Path(output_dir)
    parent_dir = output_path.parent

    # get the list of intermediate files generated for this experiment
    files = list(output_path.glob(f"*.{file_format}"))

    # get the bare filenames for each file since we will need those for sorting
    filenames = [file.stem.replace(f"{experiment_id}_", "") for file in files]

    # initialize list to hold the generated HTML strings
    html_strings = [
        """
        <tr>
            <th style="text-align:left;">Filename</th>
            <th style="text-align:left;">Description</th>
        </tr>
        """
    ]

    # iterate over the files and generate the HTML strings
    for filename, file in sorted(zip(filenames, files)):
        # get the path of the file relative to the parent of the "output"
        # directory; these will be the targets of our links
        relative_file = ".." / file.relative_to(parent_dir)

        # get the relative names of the file without the experiment id prefix
        # these are needed in case we do not have a pre-defined replacement
        relative_name = relative_file.stem.replace(f"{experiment_id}_", "")

        # if the name of the file contains "_by_<subgroup>", replace
        # the subgroup with "ZZZ" to get the replacement key; otherwise,
        # use the filename as the key; if neither form of the  key is
        # found in the dictionary, use the filename itself as the description
        try:
            if m := re.search(r"_by_([^\._]+)", filename):
                subgroup = m.group(1)
                replacement_key = filename.replace(f"_by_{subgroup}", "_by_ZZZ")
                # replace the "ZZZ" with the actual subgroup in the description
                descriptive_name = replace_dict[replacement_key].replace("ZZZ", subgroup)
            else:
                replacement_key = filename
                descriptive_name = replace_dict[filename]
        except KeyError:
            descriptive_name_components = relative_name.split("_")
            descriptive_name = " ".join(descriptive_name_components).title()

        # generate a table row string for this file
        html_strings.append(
            INTERMEDIATE_TABLE_ROW_STRING.format(relative_file, filename, descriptive_name)
        )

    # combine the HTML table row strings into a single HTML table
    table_rows = "\n".join(html_strings)
    return f"<table>{table_rows}</table>"


def show_files(context: str, output_dir: str, experiment_id: str, file_format: str) -> None:
    """
    Show files for given context and directory as a table in a Jupyter notebook.

    Parameters
    ----------
    context: str
        The tool context: one of {``"rsmtool"``, ``"rsmeval"``, ``"rsmsummarize"``}.
    output_dir : str
        The output directory.
    experiment_id : str
        The experiment ID.
    file_format : str
        The format of the output files.
    """
    html_string = get_files_as_html(
        output_dir,
        experiment_id,
        file_format,
        replace_dict=INTERMEDIATE_FILES_TO_DESCRIPTIONS[context],
    )
    display(HTML(html_string))