Source code for rsmtool.convert_feature_json

"""
Convert older feature files in JSON to CSV/TSV/XLS/XLSX.

:author: Anastassia Loukina (aloukina@ets.org)
:author: Nitin Madnani (nmadnani@ets.org)
:author: Jeremy Biggs (jbiggs@ets.org)

:organization: ETS
"""

import argparse
import json
import os
from os.path import splitext

import pandas as pd


[docs] def convert_feature_json_file(json_file: str, output_file: str, delete=False) -> None: """ Convert given feature JSON file into tabular format. The specific format is inferred by the extension of the output file. Parameters ---------- json_file : str Path to feature JSON file to be converted. output_file : str Path to CSV/TSV/XLSX output file. delete : bool Whether to delete the original file after conversion. Defaults to ``False``. Raises ------ RuntimeError If the given input file is not a valid feature JSON file. RuntimeError If the output file has an unsupported extension. """ # make sure the input file is a valid feature JSON file json_dict = json.load(open(json_file, "r")) if not list(json_dict.keys()) == ["features"]: raise RuntimeError(f"{json_file} is not a valid feature JSON file") # convert to tabular format df_feature = pd.DataFrame(json_dict["features"]) # make sure the output file is in a supported format output_extension = splitext(output_file)[1].lower() if output_extension not in [".csv", ".tsv", ".xlsx"]: raise RuntimeError( f"The output file {output_file} has an unsupported extension. " f"It must be a CSV/TSV/XLSX file. RSMTool no longer supports " f".xls files." ) if output_extension == ".csv": df_feature.to_csv(output_file, index=False) elif output_extension == ".tsv": df_feature.to_csv(output_file, sep="\t", index=False) elif output_extension == ".xlsx": df_feature.to_excel(output_file, sheet_name="features", index=False) if delete: os.unlink(json_file)
def main() -> None: """Entry point for the ``convert_feature_json`` command line utility.""" parser = argparse.ArgumentParser(prog="convert_feature_json") parser.add_argument("json_file", help="The feature JSON file to convert " "to tabular format.") parser.add_argument( "output_file", help="The output file containing the features " "in tabular format.", ) parser.add_argument( "--delete", help="Delete original JSON file after conversion.", default=False, required=False, action="store_true", ) args = parser.parse_args() convert_feature_json_file(args.json_file, args.output_file, delete=args.delete) if __name__ == "__main__": main()