Spaces:

Dhruv-Ty
/

CRAX

Sleeping

App Files Files Community

CRAX / benchmark /utils.py

Dhruv-Ty

initial commit

cb3a670 11 days ago

raw

history blame contribute delete

2.43 kB

	import os
	import json
	from typing import Dict, List


	def load_eurorad_dataset(
	dataset_path: str,
	section: str = "any",
	as_dict: bool = False,
	filter_by_caption: List[str] = [
	"xray",
	"x-ray",
	"x ray",
	"ray",
	"xr",
	"radiograph",
	"radiogram",
	"plain film",
	],
	) -> List[Dict] \| Dict[str, Dict]:
	"""
	Load a dataset from a JSON file.

	Args:
	dataset_path (str): Path to the JSON dataset file.
	section (str, optional): Section of the dataset to load. Defaults to "any".
	as_dict (bool, optional): Whether to return data as dict. Defaults to False.
	filter_by_caption (List[str], optional): List of strings to filter cases by caption content. Defaults to [].

	Returns:
	List[Dict] \| Dict[str, Dict]: The loaded dataset as a list of dictionaries or dict if as_dict=True.

	Raises:
	FileNotFoundError: If dataset_path does not exist
	json.JSONDecodeError: If file is not valid JSON
	"""

	with open(dataset_path, "r", encoding="utf-8") as file:
	data = json.load(file)

	if filter_by_caption:
	filtered_data = {}
	for case_id, case in data.items():
	if any(
	any(x in subfig["caption"].lower() for x in filter_by_caption)
	for figure in case["figures"]
	for subfig in figure["subfigures"]
	) or any(x in case["image_finding"].lower() for x in filter_by_caption):
	filtered_data[case_id] = case
	data = filtered_data

	if section != "any":
	section = section.strip().lower()
	if not as_dict:
	data = [
	item for item in data.values() if item.get("section", "").strip().lower() == section
	]
	else:
	data = {
	k: v for k, v in data.items() if v.get("section", "").strip().lower() == section
	}

	elif not as_dict:
	data = list(data.values())

	return data


	def save_dataset(dataset: Dict \| List[Dict], dataset_path: str):
	"""
	Save a dataset to a JSON file.

	Args:
	dataset (Dict \| List[Dict]): The dataset to save as a dictionary or list of dictionaries.
	dataset_path (str): Path where the JSON dataset file will be saved.
	"""
	with open(dataset_path, "w", encoding="utf-8") as file:
	json.dump(dataset, file)