nmrproject/ILP/nmrSimilarity.py

#Binning mostly for broader peaks?
#

import math
import numpy as np

#Xanthine
HXANTHINE = {
    1: ([7.96], [1]),
    2: ([9.45], [1]),
    3: ([7.725], [1]),
    4: ([7.625], [1]),
}
CXANTHINE = {
    1: ([159.40], [1]),
    2: ([164.01], [1]),
    3: ([120.94], [1]),
    4: ([161.24], [1]),
    5: ([146.98], [1]),
}

#1-Methylxanthine
H1XANTHINE = {
    1: ([7.93], [1]),
    2: ([9.45], [1]),
    3: ([4.05], [3]),
    4: ([7.91], [1]),
}
C1XANTHINE = {
    1: ([161.50], [1]),
    2: ([166.28], [1]),
    3: ([120.65], [1]),
    4: ([158.74], [1]),
    5: ([146.25], [1]),
    6: ([38.55], [1]),
}

#3-Methylxanthine
H3XANTHINE = {
    1: ([4.15], [3]),
    2: ([7.73], [1]),
    3: ([7.99], [1]),
    4: ([9.49], [1]),
}
C3XANTHINE = {
    1: ([161.83], [1]),
    2: ([163.37], [1]),
    3: ([121.24], [1]),
    4: ([163.15], [1]),
    5: ([146.49], [1]),
    6: ([39.71], [1]),
}

#7-Methylxanthine
H7XANTHINE = {
    1: ([7.55], [1]),
    2: ([4.47], [3]),
    3: ([7.72], [1]),
    4: ([7.655], [1]),
}
C7XANTHINE= {
    1: ([159.50], [1]),
    2: ([165.47], [1]),
    3: ([122.15], [1]),
    4: ([162.31], [1]),
    5: ([151.55], [1]),
    6: ([45.06], [1]),
}

#Theophylline
H13XANTHINE = {
    1: ([4.03], [3]),
    2: ([7.98], [1]),
    3: ([4.19], [3]),
    4: ([9.49], [1]),
}
C13XANTHINE = {
    1: ([163.77], [1]),
    2: ([165.26], [1]),
    3: ([120.73], [1]),
    4: ([160.99], [1]),
    5: ([145.80], [1]),
    6: ([40.42], [1]),
    7: ([37.60], [1]),
}

#Paraxanthine
H17XANTHINE = {
    1: ([4.50], [3]),
    2: ([7.70], [1]),
    3: ([3.98], [3]),
    4: ([7.82], [1]),
}
C17XANTHINE = {
    1: ([161.41], [1]),
    2: ([167.17], [1]),
    3: ([121.81], [1]),
    4: ([160.18], [1]),
    5: ([151.09], [1]),
    6: ([45.17], [1]),
    7: ([36.96], [1]),
}

CPARAXANTHINE = {
1: ([26.7], [1]),
2: ([32.9], [1]),
3: ([151.1], [1]),
4: ([106.5], [1]),
5: ([147.4], [1]),
6: ([155.3], [1]),
7: ([143.0], [1]),
}


#Theobromine
H37XANTHINE = {
    1: ([4.49], [3]),
    2: ([7.75], [1]),
    3: ([4.11], [3]),
    4: ([7.65], [1]),
}
C37XANTHINE = {
    1: ([161.76], [1]),
    2: ([164.86], [1]),
    3: ([122.51], [1]),
    4: ([164.29], [1]),
    5: ([151.10], [1]),
    6: ([39.33], [1]),
    7: ([45.07], [1]),
}

#Caffeine
H137XANTHINE = {
    1: ([7.73], [1]),
    2: ([4.15], [3]),
    3: ([4.52], [3]),
    4: ([4.01], [3]),
}
C137XANTHINE = {
    1: ([163.66], [1]),
    2: ([166.66], [1]),
    3: ([122.03], [1]),
    4: ([162.23], [1]),
    5: ([150.50], [1]),
    6: ([40.09], [1]),
    7: ([45.23], [1]),
    8: ([37.17], [1]),
}

CCAFFEINE = {
    1: ([155.7], [1]),
    2: ([148.8], [1]),
    3: ([107.7], [1]),
    4: ([152.2], [1]),
    5: ([143.0], [1]),
    6: ([27.2], [1]),
    7: ([29.1], [1]),
    8: ([32.9], [1]),
}

CCAFFEINE2 = {
1: ([27.7], [1]),
2: ([29.3], [1]),
3: ([33.1], [1]),
4: ([151.0], [1]),
5: ([148.1], [1]),
6: ([106.6], [1]),
7: ([154.5], [1]),
8: ([142.8], [1]),
}


#Experimental 7-Methylxanthine nmr
HNMR1= {
    1: ([10.85], [1]),
    2: ([11.50], [1]),
    3: ([3.82], [3]),
    4: ([7.88], [1]),
}

CNMR1= {
    1: ([155.85], [1]),
    2: ([151.35], [1]),
    3: ([149.30], [1]),
    4: ([143.01], [1]),
    5: ([106.90], [1]),
    6: ([33.03], [1]),
}


#Experimental Theobromine nmr
HNMR2= {
    1: ([11.10], [1]),
    2: ([3.33], [3]),
    3: ([3.84], [3]),
    4: ([7.97], [1]),
}

CNMR2= {
    1: ([154.9], [1]),
    2: ([149.8], [1]),
    3: ([107.1], [1]),
    4: ([151.0], [1]),
    5: ([142.8], [1]),
    6: ([29.3], [1]),
    7: ([33.9], [1]),
}

def overlap(listref, listnew):
    twoleft = np.sum(np.multiply(np.concatenate((listref, [0, 0])),  np.concatenate(([0, 0], listnew))))
    oneleft = np.sum(np.multiply(np.concatenate((listref, [0])),  np.concatenate(([0], listnew))))
    neutral = np.sum(np.multiply(listref,listnew))
    oneright = np.sum(np.multiply(np.concatenate(([0], listref)),  np.concatenate((listnew, [0]))))
    tworight = np.sum(np.multiply(np.concatenate(([0, 0], listref)),  np.concatenate((listnew, [0, 0]))))
    overlap = (oneleft + oneright)* 0.5 + neutral
    return overlap

def bin_array(spectra, highest_ppm, lowest_ppm, bin_width):
    binnumber = math.ceil((highest_ppm - lowest_ppm)/bin_width)
    bin = [0] * binnumber
    for peak in spectra:
        (shift, height) = spectra[peak]
        binindex = math.floor((shift[0] - lowest_ppm) / bin_width)
        bin[binindex] += height[0]
    normalizedbin = np.divide(bin, np.sum(bin))
    return normalizedbin

def define_border_values(spectraref, spectranew):
    shifts = []
    for _,(shift,_) in spectraref.items():
        shifts.append(shift[0])
    for _,(shift,_) in spectranew.items():
        shifts.append(shift[0])
    highest_ppm = math.ceil(max(shifts))
    lowest_ppm = math.floor(min(shifts))
    return (lowest_ppm, highest_ppm)

def similarity_nmr(spectraref, spectranew, bin_width):

    #Maximize likelihood or minimize Deviation
    #Values for two spectra and optimize largest for both different?
    #Spectra in Nodes to allow maximize overlapp with both spectra or one spectra.
    #5.4.2 Eliminating X–H signals from 1H NMR spectra
    lowest_ppm, highest_ppm = define_border_values(spectraref, spectranew)
    binref = bin_array(spectraref, highest_ppm, lowest_ppm, bin_width)
    binnew = bin_array(spectranew, highest_ppm, lowest_ppm, bin_width)
    crosscorr = overlap(binref, binnew)
    refselfcorr = overlap(binref, binref)
    newselfcorr = overlap(binnew, binnew)
    simidx = crosscorr / math.sqrt(refselfcorr * newselfcorr)
    return(simidx)

def main():
    positive = 0
    negative = 0
    bad_binwidth = []

    '''for i in [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 2.5, 3.0, 3.5, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 12.0, 14.0, 16.0, 18.0, 20.0, 25.0, 30.0, 35.0]:
        print(similarity_nmr(CNMR1, CNMR2, i), i)
        if(similarity_nmr(CCAFFEINE2, CCAFFEINE, i) - similarity_nmr(CPARAXANTHINE, CCAFFEINE, i) < 0):
            negative += 1
            bad_binwidth.append(i)
        else:
            positive += 1
    print(f'Wrong similarity result: {negative} and Right similarity result: {positive}')
    print(bad_binwidth)'''
    for i in np.arange(0.01, 0.07, 0.01):
        print(f'Increment i: {i}')
        print(similarity_nmr(HNMR1, HNMR2, i))
        print(similarity_nmr(H1XANTHINE, HNMR1, i))
        print(similarity_nmr(H3XANTHINE, HNMR1, i))
        print(similarity_nmr(H7XANTHINE, HNMR1, i))

if __name__ == "__main__":
    main()