File size: 1,377 Bytes
1d7c63d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
from sklearn.decomposition import PCA
import pickle as pk
import numpy as np
import pandas as pd



pca_fossils = pk.load(open('pca_fossils_170_finer.pkl','rb'))
pca_leaves = pk.load(open('pca_leaves_170_finer.pkl','rb'))
embedding_fossils = np.load('embedding_fossils_170_finer.npy')
#embedding_leaves = np.load('embedding_leaves.npy')

fossils_pd= pd.read_csv('fossils_paths.csv')

def pca_distance(pca,sample,embedding):
    s = pca.transform(sample.reshape(1,-1))
    all = pca.transform(embedding[:,-1])
    distances = np.linalg.norm(all - s, axis=1)
    print(distances)
    return np.argsort(distances)[:5]

def return_paths(argsorted,files):
    paths= []
    for i in argsorted:
        paths.append(files[i])
    return paths


def get_images(embedding):
    
    #pca_embedding_fossils = pca_fossils.transform(embedding_fossils[:,-1])
    
    pca_d =pca_distance(pca_fossils,embedding,embedding_fossils)
    
    fossils_paths = fossils_pd['file_name'].values
    
    paths = return_paths(pca_d,fossils_paths)
    print(paths)
    paths= [path.replace('/gpfs/data/tserre/irodri15/Fossils/new_data/leavesdb-v1_1/images/Fossil/Florissant_Fossil/512/full/jpg/',
                         '/media/data_cifs/projects/prj_fossils/data/processed_data/leavesdb-v1_1/images/Fossil/Florissant_Fossil/original/full/jpg/') for path in paths]
    print(paths)
    return paths