File size: 3,427 Bytes
d5f5654
2a50df2
8287176
 
d5f5654
8858793
d5f5654
8287176
d5f5654
ddf094a
 
 
299be58
ddf094a
299be58
ddf094a
 
299be58
ddf094a
d5f5654
8287176
d5f5654
ddf094a
 
299be58
ddf094a
 
299be58
ddf094a
 
 
299be58
ddf094a
d5f5654
8287176
d5f5654
ddf094a
 
 
 
 
299be58
ddf094a
 
 
 
 
8287176
 
 
299be58
8287176
299be58
ddf094a
 
 
299be58
ddf094a
 
 
 
 
299be58
47d2223
 
e7bc9a5
 
47d2223
 
 
299be58
47d2223
e7bc9a5
 
 
 
 
299be58
 
e7bc9a5
 
47d2223
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import json
import traceback
import mlcroissant as mlc
import func_timeout

WAIT_TIME = 5 * 60  # seconds

def validate_json(file_path):
    """Validate that the file is proper JSON."""
    try:
        with open(file_path, 'r') as f:
            json_data = json.load(f)
        return True, "The file is valid JSON.", json_data
    except json.JSONDecodeError as e:
        error_message = f"Invalid JSON format: {str(e)}"
        return False, error_message, None
    except Exception as e:
        error_message = f"Error reading file: {str(e)}"
        return False, error_message, None

def validate_croissant(json_data):
    """Validate that the JSON follows Croissant schema."""
    try:
        dataset = mlc.Dataset(jsonld=json_data)
        return True, "The dataset passes Croissant validation."
    except mlc.ValidationError as e:
        error_details = traceback.format_exc()
        error_message = f"Validation failed: {str(e)}\n\n{error_details}"
        return False, error_message
    except Exception as e:
        error_details = traceback.format_exc()
        error_message = f"Unexpected error during validation: {str(e)}\n\n{error_details}"
        return False, error_message

def validate_records(json_data):
    """Validate that records can be generated within the time limit."""
    try:
        dataset = mlc.Dataset(jsonld=json_data)
        record_sets = dataset.metadata.record_sets
        
        if not record_sets:
            return True, "No record sets found to validate."
        
        results = []
        
        for record_set in record_sets:
            try:
                records = dataset.records(record_set=record_set.uuid)
                print(records)
                _ = func_timeout.func_timeout(WAIT_TIME, lambda: next(iter(records)))
                results.append(f"Record set '{record_set.uuid}' passed validation.")
            except func_timeout.exceptions.FunctionTimedOut:
                error_message = f"Record set '{record_set.uuid}' generation took too long (>300s)"
                return False, error_message
            except Exception as e:
                error_details = traceback.format_exc()
                error_message = f"Record set '{record_set.uuid}' failed: {str(e)}\n\n{error_details}"
                return False, error_message
        
        return True, "\n".join(results)
    except Exception as e:
        error_details = traceback.format_exc()
        error_message = f"Unexpected error during records validation: {str(e)}\n\n{error_details}"
        return False, error_message 

def generate_validation_report(filename, json_data, results):
    """Generate a detailed validation report in markdown format."""
    report = []
    report.append("# CROISSANT VALIDATION REPORT")
    report.append("=" * 80)
    report.append("## VALIDATION RESULTS")
    report.append("-" * 80)
    report.append(f"Starting validation for file: {filename}")
    
    # Add validation results
    for test_name, passed, message in results:
        report.append(f"### {test_name}")
        report.append("βœ“" if passed else "βœ—")
        report.append(message.strip())  # Remove any trailing newlines
    
    # Add JSON-LD reference
    report.append("## JSON-LD REFERENCE")
    report.append("=" * 80)
    report.append("```json")
    report.append(json.dumps(json_data, indent=2))
    report.append("```")
    
    return "\n".join(report)