diff --git a/ILP/nmrSimilarity.py b/ILP/nmrSimilarity.py index 0e644f8..239680d 100644 --- a/ILP/nmrSimilarity.py +++ b/ILP/nmrSimilarity.py @@ -1,7 +1,5 @@ -#Also ILP? Minimize distance becuse two vectors might be shifted to oneanother (high weight on height distance to keep same height) -#Zu hohe Komplexität bei Skalierung -import gurobipy as gp -from gurobipy import GRB, Model, quicksum +import math +import numpy as np #Xanthine HXANTHINE= { @@ -51,13 +49,13 @@ C3XANTHINE= { } #7-Methylxanthine -H3XANTHINE= { +H7XANTHINE= { 1: ([7.55], [1]), 2: ([4.47], [3]), 3: ([7.72], [1]), 4: ([7.655], [1]), } -C3XANTHINE= { +C7XANTHINE= { 1: ([159.50], [1]), 2: ([165.47], [1]), 3: ([122.15], [1]), @@ -147,16 +145,60 @@ HNMR1= { #Experimental Theobromine nmr -HNMR1= { +HNMR2= { 1: ([11.10], [1]), 2: ([3.33], [3]), 3: ([3.84], [3]), 4: ([7.97], [1]), } -def build_model(name, nmrnode, nmrmeasured, excluded_support=None): - model = Model(name) +CNMR2= { + 1: ([154.9], [1]), + 2: ([149.8], [1]), + 3: ([107.1], [1]), + 4: ([151.0], [1]), + 5: ([142.8], [1]), + 6: ([29.3], [1]), + 7: ([33.9], [1]), +} + +def overlap(listref, listnew): + twoleft = np.sum(np.multiply(np.concatenate((listref, [0, 0])), np.concatenate(([0, 0], listnew)))) + oneleft = np.sum(np.multiply(np.concatenate((listref, [0])), np.concatenate(([0], listnew)))) + neutral = np.sum(np.multiply(listref,listnew)) + oneright = np.sum(np.multiply(np.concatenate(([0], listref)), np.concatenate((listnew, [0])))) + tworight = np.sum(np.multiply(np.concatenate(([0, 0], listref)), np.concatenate((listnew, [0, 0])))) + overlap = (oneleft + oneright)* 0.5 + neutral + return overlap + +def bin_array(spectra, highest_ppm, lowest_ppm, bin_width): + binnumber = math.ceil((highest_ppm - lowest_ppm)/bin_width) + bin = [0] * binnumber + for peak in spectra: + (shift, height) = spectra[peak] + binindex = math.floor((shift[0] - lowest_ppm) / bin_width) + bin[binindex] += height[0] + normalizedbin = np.divide(bin, np.sum(bin)) + return normalizedbin + +def similarity_nmr(spectraref, spectranew, bin_width, highest_ppm, lowest_ppm): + #Maximize likelihood or minimize Deviation #Values for two spectra and optimize largest for both different? #Spectra in Nodes to allow maximize overlapp with both spectra or one spectra. - #Vergleiche PCA, Binning (Größere Schwankungen zwischen den Methoden) \ No newline at end of file + #5.4.2 Eliminating X–H signals from 1H NMR spectra + binref = bin_array(spectraref, highest_ppm, lowest_ppm, bin_width) + binnew = bin_array(spectranew, highest_ppm, lowest_ppm, bin_width) + crosscorr = overlap(binref, binnew) + refselfcorr = overlap(binref, binref) + newselfcorr = overlap(binnew, binnew) + simidx = crosscorr / math.sqrt(refselfcorr * newselfcorr) + return(simidx) + +def main(): + print(similarity_nmr(H1XANTHINE, HNMR1, 0.4, 13, 3)) + print(similarity_nmr(H3XANTHINE, HNMR1, 0.4, 13, 3)) + print(similarity_nmr(H7XANTHINE, HNMR1, 0.4, 13, 3)) + +if __name__ == "__main__": + main() \ No newline at end of file