nmrproject/ILP/nmrSimilarity.py

276 lines
6.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#Binning mostly for broader peaks?
#
import math
import numpy as np
#Xanthine
HXANTHINE = {
1: ([7.96], [1]),
2: ([9.45], [1]),
3: ([7.725], [1]),
4: ([7.625], [1]),
}
CXANTHINE = {
1: ([159.40], [1]),
2: ([164.01], [1]),
3: ([120.94], [1]),
4: ([161.24], [1]),
5: ([146.98], [1]),
}
#1-Methylxanthine
H1XANTHINE = {
1: ([7.93], [1]),
2: ([9.45], [1]),
3: ([4.05], [3]),
4: ([7.91], [1]),
}
C1XANTHINE = {
1: ([161.50], [1]),
2: ([166.28], [1]),
3: ([120.65], [1]),
4: ([158.74], [1]),
5: ([146.25], [1]),
6: ([38.55], [1]),
}
#3-Methylxanthine
H3XANTHINE = {
1: ([4.15], [3]),
2: ([7.73], [1]),
3: ([7.99], [1]),
4: ([9.49], [1]),
}
C3XANTHINE = {
1: ([161.83], [1]),
2: ([163.37], [1]),
3: ([121.24], [1]),
4: ([163.15], [1]),
5: ([146.49], [1]),
6: ([39.71], [1]),
}
#7-Methylxanthine
H7XANTHINE = {
1: ([7.55], [1]),
2: ([4.47], [3]),
3: ([7.72], [1]),
4: ([7.655], [1]),
}
C7XANTHINE= {
1: ([159.50], [1]),
2: ([165.47], [1]),
3: ([122.15], [1]),
4: ([162.31], [1]),
5: ([151.55], [1]),
6: ([45.06], [1]),
}
#Theophylline
H13XANTHINE = {
1: ([4.03], [3]),
2: ([7.98], [1]),
3: ([4.19], [3]),
4: ([9.49], [1]),
}
C13XANTHINE = {
1: ([163.77], [1]),
2: ([165.26], [1]),
3: ([120.73], [1]),
4: ([160.99], [1]),
5: ([145.80], [1]),
6: ([40.42], [1]),
7: ([37.60], [1]),
}
#Paraxanthine
H17XANTHINE = {
1: ([4.50], [3]),
2: ([7.70], [1]),
3: ([3.98], [3]),
4: ([7.82], [1]),
}
C17XANTHINE = {
1: ([161.41], [1]),
2: ([167.17], [1]),
3: ([121.81], [1]),
4: ([160.18], [1]),
5: ([151.09], [1]),
6: ([45.17], [1]),
7: ([36.96], [1]),
}
CPARAXANTHINE = {
1: ([26.7], [1]),
2: ([32.9], [1]),
3: ([151.1], [1]),
4: ([106.5], [1]),
5: ([147.4], [1]),
6: ([155.3], [1]),
7: ([143.0], [1]),
}
#Theobromine
H37XANTHINE = {
1: ([4.49], [3]),
2: ([7.75], [1]),
3: ([4.11], [3]),
4: ([7.65], [1]),
}
C37XANTHINE = {
1: ([161.76], [1]),
2: ([164.86], [1]),
3: ([122.51], [1]),
4: ([164.29], [1]),
5: ([151.10], [1]),
6: ([39.33], [1]),
7: ([45.07], [1]),
}
#Caffeine
H137XANTHINE = {
1: ([7.73], [1]),
2: ([4.15], [3]),
3: ([4.52], [3]),
4: ([4.01], [3]),
}
C137XANTHINE = {
1: ([163.66], [1]),
2: ([166.66], [1]),
3: ([122.03], [1]),
4: ([162.23], [1]),
5: ([150.50], [1]),
6: ([40.09], [1]),
7: ([45.23], [1]),
8: ([37.17], [1]),
}
CCAFFEINE = {
1: ([155.7], [1]),
2: ([148.8], [1]),
3: ([107.7], [1]),
4: ([152.2], [1]),
5: ([143.0], [1]),
6: ([27.2], [1]),
7: ([29.1], [1]),
8: ([32.9], [1]),
}
CCAFFEINE2 = {
1: ([27.7], [1]),
2: ([29.3], [1]),
3: ([33.1], [1]),
4: ([151.0], [1]),
5: ([148.1], [1]),
6: ([106.6], [1]),
7: ([154.5], [1]),
8: ([142.8], [1]),
}
#Experimental 7-Methylxanthine nmr
HNMR1= {
1: ([10.85], [1]),
2: ([11.50], [1]),
3: ([3.82], [3]),
4: ([7.88], [1]),
}
CNMR1= {
1: ([155.85], [1]),
2: ([151.35], [1]),
3: ([149.30], [1]),
4: ([143.01], [1]),
5: ([106.90], [1]),
6: ([33.03], [1]),
}
#Experimental Theobromine nmr
HNMR2= {
1: ([11.10], [1]),
2: ([3.33], [3]),
3: ([3.84], [3]),
4: ([7.97], [1]),
}
CNMR2= {
1: ([154.9], [1]),
2: ([149.8], [1]),
3: ([107.1], [1]),
4: ([151.0], [1]),
5: ([142.8], [1]),
6: ([29.3], [1]),
7: ([33.9], [1]),
}
def overlap(listref, listnew):
twoleft = np.sum(np.multiply(np.concatenate((listref, [0, 0])), np.concatenate(([0, 0], listnew))))
oneleft = np.sum(np.multiply(np.concatenate((listref, [0])), np.concatenate(([0], listnew))))
neutral = np.sum(np.multiply(listref,listnew))
oneright = np.sum(np.multiply(np.concatenate(([0], listref)), np.concatenate((listnew, [0]))))
tworight = np.sum(np.multiply(np.concatenate(([0, 0], listref)), np.concatenate((listnew, [0, 0]))))
overlap = (oneleft + oneright)* 0.5 + neutral
return overlap
def bin_array(spectra, highest_ppm, lowest_ppm, bin_width):
binnumber = math.ceil((highest_ppm - lowest_ppm)/bin_width)
bin = [0] * binnumber
for peak in spectra:
(shift, height) = spectra[peak]
binindex = math.floor((shift[0] - lowest_ppm) / bin_width)
bin[binindex] += height[0]
normalizedbin = np.divide(bin, np.sum(bin))
return normalizedbin
def define_border_values(spectraref, spectranew):
shifts = []
for _,(shift,_) in spectraref.items():
shifts.append(shift[0])
for _,(shift,_) in spectranew.items():
shifts.append(shift[0])
highest_ppm = math.ceil(max(shifts))
lowest_ppm = math.floor(min(shifts))
return (lowest_ppm, highest_ppm)
def similarity_nmr(spectraref, spectranew, bin_width):
#Maximize likelihood or minimize Deviation
#Values for two spectra and optimize largest for both different?
#Spectra in Nodes to allow maximize overlapp with both spectra or one spectra.
#5.4.2 Eliminating XH signals from 1H NMR spectra
lowest_ppm, highest_ppm = define_border_values(spectraref, spectranew)
binref = bin_array(spectraref, highest_ppm, lowest_ppm, bin_width)
binnew = bin_array(spectranew, highest_ppm, lowest_ppm, bin_width)
crosscorr = overlap(binref, binnew)
refselfcorr = overlap(binref, binref)
newselfcorr = overlap(binnew, binnew)
simidx = crosscorr / math.sqrt(refselfcorr * newselfcorr)
return(simidx)
def main():
positive = 0
negative = 0
bad_binwidth = []
'''for i in [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.5, 3.0, 3.5, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 25.0, 30.0, 35.0]:
print(similarity_nmr(CNMR1, CNMR2, i), i)
if(similarity_nmr(CCAFFEINE2, CCAFFEINE, i) - similarity_nmr(CPARAXANTHINE, CCAFFEINE, i) < 0):
negative += 1
bad_binwidth.append(i)
else:
positive += 1
print(f'Wrong similarity result: {negative} and Right similarity result: {positive}')
print(bad_binwidth)'''
for i in np.arange(0.01, 0.07, 0.01):
print(f'Increment i: {i}')
print(similarity_nmr(HNMR1, HNMR2, i))
print(similarity_nmr(H1XANTHINE, HNMR1, i))
print(similarity_nmr(H3XANTHINE, HNMR1, i))
print(similarity_nmr(H7XANTHINE, HNMR1, i))
if __name__ == "__main__":
main()