"""
Utility classes and functions for RSMTool file management.
:author: Jeremy Biggs (jbiggs@ets.org)
:author: Anastassia Loukina (aloukina@ets.org)
:author: Nitin Madnani (nmadnani@ets.org)
:organization: ETS
"""
import json
import re
from glob import glob
from os.path import join
from pathlib import Path
from typing import Any, Dict, Union
from .constants import POSSIBLE_EXTENSIONS
def has_files_with_extension(directory: str, ext: str) -> bool:
"""
Check if the directory has any files with the given extension.
Parameters
----------
directory : str
The path to the directory where output is located.
ext : str
The the given extension.
Returns
-------
ans : bool
``True`` if directory contains files with given extension,
else ``False``.
"""
files_with_extension = glob(join(directory, f"*.{ext}"))
return len(files_with_extension) > 0
def get_output_directory_extension(directory: str, experiment_id: str) -> str:
"""
Check output directory to determine what file extensions exist.
If more than one extension (in the possible list of
extensions) exists, then raise a ``ValueError``. Otherwise,
return the one file extension. If no extensions can be found, then
"csv" will be returned by default.
Possible extensions include: "csv", "tsv", and "xlsx". Files in the
directory with none of these extensions are ignored.
Parameters
----------
directory : str
The path to the directory where output is located.
experiment_id : str
The ID of the experiment.
Returns
-------
extension : str
The extension that output files in this directory end with. One of
{``"csv"``, ``"tsv"``, ``"xlsx"``}.
Raises
------
ValueError
If any files in the directory have extensions other than ``"csv"``,
``"tsv"``, or ``"xlsx"``.
"""
extension = "csv"
extensions_identified = {
ext for ext in POSSIBLE_EXTENSIONS if has_files_with_extension(directory, ext)
}
if len(extensions_identified) > 1:
raise ValueError(
f"Some of the files in the experiment output directory (`{directory}`) "
f"for `{experiment_id}` have different extensions. All files in this "
f"directory must have the same extension. The following extensions "
f"were identified : {', '.join(extensions_identified)}"
)
elif len(extensions_identified) == 1:
extension = list(extensions_identified)[0]
return extension