CCCDev commited on
Commit
588bc8d
·
verified ·
1 Parent(s): 3842d8b

Upload resnet50_classification.py

Browse files
Files changed (1) hide show
  1. resnet50_classification.py +63 -0
resnet50_classification.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from tensorflow.keras.applications import ResNet50
3
+ from tensorflow.keras.preprocessing import image
4
+ from tensorflow.keras.applications.resnet50 import preprocess_input
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ import os
7
+
8
+ # Load the pre-trained ResNet50 model
9
+ model = ResNet50(weights='imagenet', include_top=False, pooling='avg')
10
+
11
+
12
+ # Function to extract feature vector from an image
13
+ def extract_features(img_path, model):
14
+ img = image.load_img(img_path, target_size=(224, 224))
15
+ img_data = image.img_to_array(img)
16
+ img_data = np.expand_dims(img_data, axis=0)
17
+ img_data = preprocess_input(img_data)
18
+ features = model.predict(img_data)
19
+ return features.flatten()
20
+
21
+
22
+ # Directory containing images
23
+ image_dir = './images/forward_facing'
24
+
25
+ # Extract features for all images
26
+ image_features = {}
27
+ for img_file in os.listdir(image_dir):
28
+ img_path = os.path.join(image_dir, img_file)
29
+ features = extract_features(img_path, model)
30
+ image_features[img_file] = features
31
+
32
+ # Convert feature dictionary to list for processing
33
+ feature_list = list(image_features.values())
34
+ file_list = list(image_features.keys())
35
+
36
+ # Calculate similarities
37
+ num_images = len(file_list)
38
+ similarity_matrix = np.zeros((num_images, num_images))
39
+
40
+ for i in range(num_images):
41
+ for j in range(i, num_images):
42
+ if i != j:
43
+ similarity = cosine_similarity(
44
+ [feature_list[i]],
45
+ [feature_list[j]]
46
+ )[0][0]
47
+ similarity_matrix[i][j] = similarity
48
+ similarity_matrix[j][i] = similarity
49
+
50
+ # Identify and remove duplicates
51
+ threshold = 0.9 # Similarity threshold for duplicates
52
+ duplicates = set()
53
+ for i in range(num_images):
54
+ for j in range(i + 1, num_images):
55
+ if similarity_matrix[i][j] > threshold:
56
+ duplicates.add(file_list[j])
57
+
58
+ # Remove duplicates
59
+ # for duplicate in duplicates:
60
+ # os.remove(os.path.join(image_dir, duplicate))
61
+ print("Duplicate Images No => ", len(duplicates))
62
+
63
+ # print(f"Removed {len(duplicates)} duplicate images.")