File size: 9,738 Bytes
1f855fb
f6a9c5a
 
1f855fb
f6a9c5a
1f855fb
f6a9c5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f855fb
f6a9c5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f855fb
 
f6a9c5a
 
 
 
 
 
 
 
 
 
1f855fb
f6a9c5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f855fb
f6a9c5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1f855fb
 
f6a9c5a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
library(shiny)
library(ellmer)
library(purrr)

num_example_fields = 2

# Define UI for the app
ui = shiny::fluidPage(
  shiny::fluidRow(
    shiny::column(12,
                  shiny::div(
                    style = "background-color: #f8f9fa; padding: 10px; margin-bottom: 15px; border-radius: 5px;",
                    shiny::div(
                      style = "display: flex; justify-content: space-between; align-items: center;",
                      shiny::div(
                        shiny::strong("NHTSA Recall Information Extraction Tool"),
                        shiny::p("Version 1.0 - April 2025")
                      ),
                      shiny::div(
                        shiny::p("Authors: Fadel M. Megahed, Ying-Ju (Tessa) Chen"),
                        shiny::p("Contact: [email protected]")
                      )
                    )
                  )
    )
  ),
  
  shiny::titlePanel("NHTSA Recall Information Extraction"),
  
  # Add introduction panel
  shiny::fluidRow(
    shiny::column(12,
                  shiny::wellPanel(
                    shiny::h4("How to Use This App"),
                    shiny::p("This app extracts structured data from NHTSA recall notices using AI. Follow these steps:"),
                    shiny::tags$ol(
                      shiny::tags$li("Paste recall text containing information you want to extract"),
                      shiny::tags$li("Specify the number of fields to extract"),
                      shiny::tags$li("Define each field with a label and description"),
                      shiny::tags$li("Click 'Extract Data' to process")
                    ),
                    shiny::p("Example: For extracting recall information, create fields like 'manufacturer', 'models', and 'defect_summary' with clear descriptions."),
                    shiny::p("You can process multiple recalls at once: separate each recall text with a double line break (press Enter twice).")
                  )
    )
  ),
  
  shiny::sidebarLayout(
    shiny::sidebarPanel(
      shiny::textAreaInput(
        "input_text", 
        "Enter recall text to extract from:", 
        rows = 10, 
        placeholder = "Paste your recall text here...\n\nSeparate multiple recalls with double line breaks (press Enter twice).\n\nExample: 'Ford Motor Company is recalling certain 2021-2022 vehicles due to faulty brakes.'"
      ),
      
      shiny::numericInput(
        "num_fields", 
        "Number of fields to extract:", 
        value = num_example_fields, 
        min = 1, 
        max = 10
      ),
      
      # Add help text
      shiny::helpText("Define each field with a clear label (e.g., 'manufacturer') and description (e.g., 'The name of the company recalling the vehicles')."),
      
      shiny::uiOutput("fields_ui"),
      
      # Example button
      shiny::actionButton("load_example", "Load Examples", class = "btn-info"),
      shiny::actionButton("extract_btn", "Extract Data", class = "btn-primary")
    ),
    
    shiny::mainPanel(
      shiny::h3("Extracted Recall Data"),
      shiny::p("Results will appear here after extraction"),
      shiny::tableOutput("extracted_table"),
      
      # Add tips section
      shiny::wellPanel(
        shiny::h4("Tips for Better Results"),
        shiny::tags$ul(
          shiny::tags$li("Use specific field descriptions to guide the AI"),
          shiny::tags$li("Start with more fields and remove unnecessary ones later"),
          shiny::tags$li("If results are inaccurate, try rephrasing your field descriptions"),
          shiny::tags$li("To process multiple recalls, separate each with a double line break"),
          shiny::tags$li("Each recall text should contain complete information for all fields")
        )
      ),
      
      # Add API key notice
      shiny::wellPanel(
        shiny::h4("Note:"),
        shiny::p("To ensure the timeliness of results (since this is hosted on a CPU), we utilize `gpt-4o-mini` for this demo.")
      )
    )
  )
)

# Define server logic required to generate dynamic UI and extract data
server = function(input, output, session) {
  
  # Load example data
  shiny::observeEvent(input$load_example, {
    example_text = "Ford Motor Company (Ford) is recalling certain 2021-2022 Bronco vehicles equipped with rearview camera systems and 8-inch screen displays. The rearview camera image may still be displayed after a backing event has ended. As such, these vehicles fail to comply with the requirements of Federal Motor Vehicle Safety Standard number 111, \"Rear Visibility.\"\n\nHonda (American Honda Motor Co.) is recalling certain 2022-2025 Acura MDX Type-S, 2023-2025 Honda Pilot, and 2021-2025 Acura TLX Type-S vehicles. A software error in the fuel injection electronic control unit (FI-ECU) may cause an engine stall or a loss of power."
    shiny::updateTextAreaInput(session, "input_text", value = example_text)
    
    # Set up example fields
    shiny::updateNumericInput(session, "num_fields", value = num_example_fields)
  })
  
  # Dynamically generate UI elements for each field's label and description
  output$fields_ui = shiny::renderUI({
    n = input$num_fields
    if (is.null(n) || n < 1) return(NULL)
    
    # Example field definitions for NHTSA recalls
    example_labels = c("manufacturer", "defect_summary", "models", "model_years", "component", "fmvss_number", "root_cause", "risk")
    example_descs = c(
      "The name of the company recalling the vehicles.",
      "Summary of the main defect.",
      "List of affected vehicle models.",
      "List of model years affected.",
      "The part or system affected by the defect.",
      "The FMVSS number mentioned, if any.",
      "The root cause of the defect.",
      "The risk or consequence posed by the defect."
    )
    
    fields = purrr::map(1:n, function(i) {
      # Set default values based on examples if available
      default_label = if(i <= length(example_labels)) example_labels[i] else paste0("field", i)
      default_desc = if(i <= length(example_descs)) example_descs[i] else paste0("Description for field ", i)
      
      shiny::tagList(
        shiny::textInput(
          paste0("field_label_", i), 
          paste("Field", i, "Label:"), 
          value = default_label
        ),
        shiny::textInput(
          paste0("field_desc_", i), 
          paste("Field", i, "Description:"), 
          value = default_desc
        ),
        shiny::hr()
      )
    })
    do.call(shiny::tagList, fields)
  })
  
  # Build a custom type_object based on user-specified fields
  create_type_object = shiny::reactive({
    n = input$num_fields
    if (is.null(n) || n < 1) return(NULL)
    
    # Build a list of field definitions
    type_list = list()
    for(i in 1:n){
      label = input[[paste0("field_label_", i)]]
      desc  = input[[paste0("field_desc_", i)]]
      if (!is.null(label) && label != ""){
        type_list[[label]] = ellmer::type_string(desc, required = FALSE)
      }
    }
    # Dynamically create the type object
    do.call(ellmer::type_object, type_list)
  })
  
  # When the extract button is clicked, perform extraction
  shiny::observeEvent(input$extract_btn, {
    shiny::req(input$input_text)
    
    # Show processing indicator
    shiny::showNotification("Processing extraction request...", type = "message", duration = NULL, id = "extract_notif")
    
    custom_type_object = create_type_object()
    
    # Initialize the chat object using the OpenAI API key from your environment
    tryCatch({
      # Check if API key is available
      if (Sys.getenv("OPENAI_API_KEY") == "") {
        stop("OpenAI API key not found. Please set the OPENAI_API_KEY environment variable.")
      }
      
      chat = ellmer::chat_openai(
        model = 'gpt-4o-mini', 
        api_key = Sys.getenv("OPENAI_API_KEY")
      )
      
      # Extraction function
      extract_fn = function(x, chat_object, custom_type_object) {
        return(chat_object$extract_data(x, type = custom_type_object))
      }
      
      # Split text by double linebreaks to process multiple entities
      text_blocks = unlist(strsplit(input$input_text, "\n\n"))
      text_blocks = text_blocks[text_blocks != ""]  # Remove empty blocks
      
      # Process each text block
      all_results = list()
      
      for (i in seq_along(text_blocks)) {
        result = extract_fn(text_blocks[i], chat, custom_type_object)
        if (is.list(result)) {
          # Add a block_id column to identify the source text block
          result$block_id = i
          all_results[[i]] = result
        }
      }
      
      # Combine all results into a single data frame
      if (length(all_results) > 0) {
        combined_results = do.call(rbind, lapply(all_results, function(x) {
          # Ensure all results have the same columns by converting to data frame
          as.data.frame(x)
        }))
        
        # Render the output as a table
        output$extracted_table = shiny::renderTable({
          combined_results
        }, rownames = TRUE)
      } else {
        # Handle the case when no valid results are returned
        output$extracted_table = shiny::renderTable({
          data.frame(message = "No valid data could be extracted")
        })
      }
      
      # Remove notification
      shiny::removeNotification(id = "extract_notif")
      shiny::showNotification("Extraction complete!", type = "message", duration = 3)
      
    }, error = function(e) {
      # Handle errors
      shiny::removeNotification(id = "extract_notif")
      shiny::showNotification(paste("Error:", e$message), type = "error", duration = NULL)
    })
  })
}

# Run the Shiny app
shiny::shinyApp(ui = ui, server = server)