mcs07 · mcs07 · Feb 2, 2017 · Jan 25, 2017 · mcs07 · Feb 2, 2017
diff --git a/chemdataextractor/doc/table.py b/chemdataextractor/doc/table.py
@@ -22,7 +22,7 @@
     ExtinctionHeadingParser, FluorescenceLifetimeHeadingParser, FluorescenceLifetimeCellParser, \
     ElectrochemicalPotentialHeadingParser, ElectrochemicalPotentialCellParser, IrHeadingParser, IrCellParser, \
     SolventCellParser, SolventHeadingParser, SolventInHeadingParser, UvvisAbsEmiQuantumYieldHeadingParser, \
-    UvvisAbsEmiQuantumYieldCellParser, MeltingPointHeadingParser, MeltingPointCellParser, TempInHeadingParser, \
+    UvvisAbsEmiQuantumYieldCellParser, MeltingPointHeadingParser, MeltingPointCellParser, GlassTransitionHeadingParser, GlassTransitionCellParser, TempInHeadingParser, \
     UvvisAbsDisallowedHeadingParser, UvvisEmiQuantumYieldHeadingParser, UvvisEmiQuantumYieldCellParser
 # TODO: Sort out the above import... import module instead
 from ..nlp.tag import NoneTagger
@@ -50,6 +50,7 @@ class Table(CaptionedElement):
         (FluorescenceLifetimeHeadingParser(), FluorescenceLifetimeCellParser()),
         (ElectrochemicalPotentialHeadingParser(), ElectrochemicalPotentialCellParser()),
         (MeltingPointHeadingParser(), MeltingPointCellParser()),
+        (GlassTransitionHeadingParser(), GlassTransitionCellParser()),
         (SolventHeadingParser(), SolventCellParser()),
         (SolventInHeadingParser(),),
         (TempInHeadingParser(),)

diff --git a/chemdataextractor/doc/text.py b/chemdataextractor/doc/text.py
@@ -26,6 +26,7 @@
 from ..parse.table import CaptionContextParser
 from ..parse.ir import IrParser
 from ..parse.mp import MpParser
+from ..parse.tg import TgParser
 from ..parse.nmr import NmrParser
 from ..parse.uvvis import UvvisParser
 from ..nlp.lexicon import ChemLexicon
@@ -267,7 +268,7 @@ def _repr_html_(self):
 
 class Paragraph(Text):
 
-    parsers = [CompoundParser(), ChemicalLabelParser(), NmrParser(), IrParser(), UvvisParser(), MpParser(), ContextParser()]
+    parsers = [CompoundParser(), ChemicalLabelParser(), NmrParser(), IrParser(), UvvisParser(), MpParser(), TgParser(), ContextParser()]
 
     def _repr_html_(self):
         return '<p class="cde-paragraph">' + self.text + '</p>'

diff --git a/chemdataextractor/model.py b/chemdataextractor/model.py
@@ -388,6 +388,14 @@ class MeltingPoint(BaseModel):
     apparatus = StringType(contextual=True)
 
 
+class GlassTransition(BaseModel):
+    """A glass transition temperature."""
+    value = StringType()
+    units = StringType(contextual=True)
+    method = StringType(contextual=True)
+    concentration = StringType(contextual=True)
+    concentration_units = StringType(contextual=True)
+
 class QuantumYield(BaseModel):
     """A quantum yield measurement."""
     value = StringType()
@@ -437,6 +445,7 @@ class Compound(BaseModel):
     ir_spectra = ListType(ModelType(IrSpectrum))
     uvvis_spectra = ListType(ModelType(UvvisSpectrum))
     melting_points = ListType(ModelType(MeltingPoint))
+    glass_transitions = ListType(ModelType(GlassTransition))
     quantum_yields = ListType(ModelType(QuantumYield))
     fluorescence_lifetimes = ListType(ModelType(FluorescenceLifetime))
     electrochemical_potentials = ListType(ModelType(ElectrochemicalPotential))
@@ -460,6 +469,9 @@ def merge_contextual(self, other):
             for item in self[k]:
                 # print('item: %s' % item)
                 for other_item in other.get(k, []):
+		    #RBT Problem with doi: ma301230y. Had to add following check
+		    if (isinstance(other_item,unicode) == True):
+		        continue
                     # if k in {'names', 'labels'}:
                     #     # TODO: Warn attempting to merge a contextual other that contains names/labels
                     #     continue

diff --git a/chemdataextractor/parse/__init__.py b/chemdataextractor/parse/__init__.py
@@ -23,13 +23,14 @@
 from .context import ContextParser
 from .ir import IrParser
 from .mp import MpParser
+from .tg import TgParser
 from .nmr import NmrParser
 from .table import CompoundHeadingParser, SolventHeadingParser, UvvisAbsDisallowedHeadingParser, SolventInHeadingParser
 from .table import TempInHeadingParser, SolventCellParser, CompoundCellParser, UvvisEmiHeadingParser
 from .table import UvvisAbsHeadingParser, ExtinctionHeadingParser, IrHeadingParser, IrCellParser
 from .table import QuantumYieldHeadingParser, QuantumYieldCellParser, UvvisEmiCellParser, UvvisAbsCellParser
 from .table import ExtinctionCellParser, UvvisAbsEmiQuantumYieldHeadingParser, UvvisAbsEmiQuantumYieldCellParser
 from .table import UvvisEmiQuantumYieldHeadingParser, UvvisEmiQuantumYieldCellParser, FluorescenceLifetimeHeadingParser
-from .table import FluorescenceLifetimeCellParser, MeltingPointHeadingParser, MeltingPointCellParser
+from .table import FluorescenceLifetimeCellParser, MeltingPointHeadingParser, MeltingPointCellParser, GlassTransitionHeadingParser, GlassTransitionCellParser
 from .table import ElectrochemicalPotentialHeadingParser, ElectrochemicalPotentialCellParser, CaptionContextParser
 from .uvvis import UvvisParser
diff --git a/chemdataextractor/parse/context.py b/chemdataextractor/parse/context.py
@@ -19,7 +19,7 @@
 from .common import optdelim, hyphen, slash
 from ..utils import first
 from ..parse.base import BaseParser
-from ..model import Compound, QuantumYield, NmrSpectrum, UvvisSpectrum, IrSpectrum, MeltingPoint, FluorescenceLifetime
+from ..model import Compound, QuantumYield, NmrSpectrum, UvvisSpectrum, IrSpectrum, MeltingPoint, GlassTransition, FluorescenceLifetime
 from .actions import join, merge, fix_whitespace
 from .cem import chemical_name
 from .elements import I, T, R, W, ZeroOrMore, Optional, Group, OneOrMore, Any, Not
@@ -33,13 +33,17 @@
 uvvis = (I('UV') + (hyphen | slash) + R('^vis(ible)?$', re.I) + Optional(R('^abs(or[bp]tion)?$')))('uvvis').add_action(join).add_action(fix_whitespace)
 ir = (R('^(FT-?)?IR|FT-?IS$'))('ir').add_action(join)
 mp = (I('melting') + I('points'))('melting_point').add_action(join)
+tg = (I('glass') + I('transition') + I('temperature'))('glass_transition').add_action(join)
 pp = (I('photophysical') + (I('measurements') | I('properties')))('photophysical_properties').add_action(join)
-measurement = Group(quantum_yield | nmr | uvvis | ir | mp | pp)('measurement')
+measurement = Group(quantum_yield | nmr | uvvis | ir | mp | tg | pp)('measurement')
 
 result_noun = I('data') | I('results') | I('experiments') | I('spectra')
 
 verb = W('measured') | W('recorded') | W('collected') | W('taken') | W('acquired') | W('obtained') | W('run') | (W('carried') + W('out') | W('performed')) # | T('VBN')
 
+
+method = I('DSC') | I('TMA') | I('DTA') + I('RTL') | I('DMA') + I('DMTA') | I('dilatometer') | I('dilatometry') | I('PALS')
+
 apparatus_type = R('^\d{2,}$') + W('MHz')
 brands = I('HORIBA') + I('Jobin') + I('Yvon') | I('Hitachi') | I('Bruker') | I('Cary') | I('Jeol') | I('PerkinElmer') | I('Agilent') | I('Shimadzu') | I('Varian')
 models = I('FluoroMax-4') | I('F-7000') | I('AVANCE') | I('Digital') | R('\d\d\d+') | I('UV–vis-NIR') | I('Mercury') | I('Avatar') | I('thermonicolet') | I('pulsed') | I('Fourier') | I('transform')
@@ -60,7 +64,7 @@
 standard = (ZeroOrMore(T('JJ')) + OneOrMore(T('NNP') | T('NN') | T('HYPH') | T('CD') | T('B-CM') | T('I-CM')))('standard').add_action(join).add_action(fix_whitespace)
 standard_phrase = (W('with') | W('using')).hide() + Optional(dt).hide() + standard + (ZeroOrMore(W('as') | dt) + Optional(T('JJ')) + I('standard')).hide()
 
-context_phrase = Group(measurement + optdelim + Optional(result_noun).hide() + Optional(T('VBD')).hide() + ZeroOrMore(Not(verb) + Any()).hide() + verb.hide() + OneOrMore(standard_phrase | apparatus_phrase | temperature_phrase | solvent_phrase | Any().hide()))('context_phrase')
+context_phrase = Group(measurement + optdelim + Optional(result_noun).hide() + Optional(T('VBD')).hide() + ZeroOrMore(Not(verb) + Any()).hide() + verb.hide() + OneOrMore(standard_phrase | apparatus_phrase | temperature_phrase | solvent_phrase | method | Any().hide()))('context_phrase')
 
 # TODO: Multiple measurements, multiple apparatus.
 # TODO: 'respectively' phrase
@@ -81,10 +85,11 @@ def interpret(self, result, start, end):
         }
         measurement = result.xpath('./measurement/*[1]')[0]
 
-        if not measurement.tag == 'melting_point':
+        if not measurement.tag == 'melting_point' and not measurement.tag =='glass_transition':
             context['temperature'] = first(result.xpath('./temperature/value/text()'))
             context['temperature_units'] = first(result.xpath('./temperature/units/text()'))
 
+
         if measurement.tag == 'photophysical_properties':
             c.quantum_yields.append(QuantumYield(**context))
             c.fluorescence_lifetimes.append(FluorescenceLifetime(**context))
@@ -93,6 +98,8 @@ def interpret(self, result, start, end):
             c.quantum_yields.append(QuantumYield(**context))
         if measurement.tag == 'melting_point':
             c.melting_points.append(MeltingPoint(**context))
+        if measurement.tag == 'glass_transition':
+            c.glass_transitions.append(GlassTransition(**context))
         if measurement.tag == 'nmr':
             c.nmr_spectra.append(NmrSpectrum(**context))
         if measurement.tag == 'uvvis':

diff --git a/chemdataextractor/parse/mp.py b/chemdataextractor/parse/mp.py
@@ -26,7 +26,7 @@
 
 log = logging.getLogger(__name__)
 
-prefix = Optional(I('a')).hide() + (R('^m\.?pt?\.?$', re.I) | I('melting') + Optional((I('point') | I('temperature')| I('range'))) | R('^m\.?$', re.I) + R('^pt?\.?$', re.I)).hide() + Optional(W('=') | I('of') | I('was') | I('is') | I('at')).hide() + Optional(I('in') + I('the') + I('range') + Optional(I('of')) | I('about')).hide()
+prefix = Optional(I('a')).hide() + (Optional(lbrct) + W('Tm') + Optional(rbrct)| R('^m\.?pt?\.?$', re.I) | I('melting') + Optional((I('point') | I('temperature')| I('range'))) | R('^m\.?$', re.I) + R('^pt?\.?$', re.I)).hide() + Optional(lbrct + W('Tm') + rbrct) + Optional(W('=') | I('of') | I('was') | I('is') | I('at')).hide() + Optional(I('in') + I('the') + I('range') + Optional(I('of')) | I('about')).hide()
 
 delim = R('^[:;\.,]$')
 
@@ -38,7 +38,7 @@
 to_range = (R('^[\+\-–−]?\d+(\.\d+)?$') + Optional(units).hide() + (I('to') + R('^[\+\-–−]?\d+(\.\d+)?$') | R('^[\+\-–−]\d+(\.\d+)?$')))('value').add_action(join)
 temp_range = (Optional(R('^[\-–−]$')) + (joined_range | spaced_range | to_range))('value').add_action(merge)
 temp_value = (Optional(R('^[~∼˜\<\>]$')) + Optional(R('^[\-–−]$')) + R('^[\+\-–−]?\d+(\.\d+)?$'))('value').add_action(merge)
-temp = (temp_range | temp_value)('value')
+temp = Optional(lbrct).hide() + (temp_range | temp_value)('value') + Optional(rbrct).hide()
 
 mp = (prefix + Optional(delim).hide() + temp + units)('mp')
 
@@ -52,7 +52,6 @@
 
 mp_phrase = cem_mp_phrase | to_give_mp_phrase | obtained_mp_phrase
 
-
 class MpParser(BaseParser):
     """"""
     root = mp_phrase

diff --git a/chemdataextractor/parse/table.py b/chemdataextractor/parse/table.py
@@ -19,7 +19,7 @@
 
 from .common import delim
 from ..utils import first
-from ..model import Compound, UvvisSpectrum, UvvisPeak, QuantumYield, FluorescenceLifetime, MeltingPoint
+from ..model import Compound, UvvisSpectrum, UvvisPeak, QuantumYield, FluorescenceLifetime, MeltingPoint, GlassTransition
 from ..model import ElectrochemicalPotential, IrSpectrum, IrPeak
 from .actions import join, merge, fix_whitespace
 from .base import BaseParser
@@ -201,6 +201,12 @@ def split_uvvis_shape(tokens, start, result):
     temp_with_optional_units + ZeroOrMore(delims.hide() + temp_with_optional_units)
 )('melting_point_cell')
 
+glass_transition_title = R('^T(g\.)$', re.I) | W('T') + R('^(g\.)?$')
+glass_transition_heading = (glass_transition_title.hide() + delims.hide() + Optional(temp_units))('glass_transition_heading')
+glass_transition_cell = (
+    temp_with_optional_units + ZeroOrMore(delims.hide() + temp_with_optional_units)
+)('glass_transition_cell')
+
 caption_context = Group(subject_phrase | solvent_phrase | temp_phrase)('caption_context')
 
 
@@ -241,11 +247,12 @@ def interpret(self, result, start, end):
         solvent = first(result.xpath('./name/text()'))
         if solvent is not None:
             context = {'solvent': solvent}
-            c.melting_points = [MeltingPoint(context)]
-            c.quantum_yields = [QuantumYield(context)]
-            c.fluorescence_lifetimes = [FluorescenceLifetime(context)]
-            c.electrochemical_potentials = [ElectrochemicalPotential(context)]
-            c.uvvis_spectra = [UvvisSpectrum(context)]
+            c.melting_points = [MeltingPoint(**context)]
+            c.glass_transitions = [GlassTransition(**context)]
+            c.quantum_yields = [QuantumYield(**context)]
+            c.fluorescence_lifetimes = [FluorescenceLifetime(**context)]
+            c.electrochemical_potentials = [ElectrochemicalPotential(**context)]
+            c.uvvis_spectra = [UvvisSpectrum(**context)]
         if c.serialize():
             yield c
 
@@ -261,10 +268,11 @@ def interpret(self, result, start, end):
             'temperature': first(result.xpath('./value/text()')),
             'temperature_units': first(result.xpath('./units/text()'))
         }
-        c.quantum_yields = [QuantumYield(context)]
-        c.fluorescence_lifetimes = [FluorescenceLifetime(context)]
-        c.electrochemical_potentials = [ElectrochemicalPotential(context)]
-        c.uvvis_spectra = [UvvisSpectrum(context)]
+	# RBT Same problem as before missing **?
+        c.quantum_yields = [QuantumYield(**context)]
+        c.fluorescence_lifetimes = [FluorescenceLifetime(**context)]
+        c.electrochemical_potentials = [ElectrochemicalPotential(**context)]
+        c.uvvis_spectra = [UvvisSpectrum(**context)]
         yield c
 
 
@@ -278,11 +286,13 @@ def interpret(self, result, start, end):
         solvent = first(result.xpath('./name/text()'))
         if solvent is not None:
             context = {'solvent': solvent}
-            c.melting_points = [MeltingPoint(context)]
-            c.quantum_yields = [QuantumYield(context)]
-            c.fluorescence_lifetimes = [FluorescenceLifetime(context)]
-            c.electrochemical_potentials = [ElectrochemicalPotential(context)]
-            c.uvvis_spectra = [UvvisSpectrum(context)]
+	    # RBT (Added ** to context)
+            c.melting_points = [MeltingPoint(**context)]
+            c.glass_transitions = [GlassTransition(**context)]
+            c.quantum_yields = [QuantumYield(**context)]
+            c.fluorescence_lifetimes = [FluorescenceLifetime(**context)]
+            c.electrochemical_potentials = [ElectrochemicalPotential(**context)]
+            c.uvvis_spectra = [UvvisSpectrum(**context)]
         if c.serialize():
             yield c
 
@@ -597,6 +607,37 @@ def interpret(self, result, start, end):
             yield c
 
 
+class GlassTransitionHeadingParser(BaseParser):
+    """"""
+    root = glass_transition_heading
+
+    def interpret(self, result, start, end):
+        """"""
+        glass_transition_units = first(result.xpath('./units/text()'))
+        c = Compound()
+        if glass_transition_units:
+            c.glass_transitions.append(
+                GlassTransition(units=glass_transition_units)
+            )
+        yield c
+
+class GlassTransitionCellParser(BaseParser):
+    """"""
+    root = glass_transition_cell
+
+    def interpret(self, result, start, end):
+        """"""
+        c = Compound()
+        for tg in result.xpath('./temp'):
+            c.glass_transitions.append(
+                GlassTransition(
+                    value=first(mp.xpath('./value/text()')),
+                    units=first(mp.xpath('./units/text()'))
+                )
+            )
+        if c.glass_transition:
+            yield c
+
 class ElectrochemicalPotentialHeadingParser(BaseParser):
     """"""
     root = electrochemical_potential_heading
@@ -650,6 +691,13 @@ def interpret(self, result, start, end):
         if context:
             c.melting_points = [MeltingPoint(**context)]
         temp = first(result.xpath('./temp_phrase'))
+        if temp is not None:
+            context['temperature'] = first(temp.xpath('./temp/value/text()'))
+            context['temperature_units'] = first(temp.xpath('./temp/units/text()'))
+        # Glass transition temperature shouldn't have contextual temperature
+        if context:
+            c.glass_transitions = [GlassTransition(**context)]
+        temp = first(result.xpath('./temp_phrase'))
         if temp is not None:
             context['temperature'] = first(temp.xpath('./temp/value/text()'))
             context['temperature_units'] = first(temp.xpath('./temp/units/text()'))

diff --git a/chemdataextractor/parse/tg.py b/chemdataextractor/parse/tg.py
@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+"""
+chemdataextractor.parse.nmr
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+NMR text parser.
+
+:copyright: Copyright 2016 by Matt Swain.
+:license: MIT, see LICENSE file for more details.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import logging
+import re
+
+from chemdataextractor.parse.cem import cem, chemical_label, lenient_chemical_label, solvent_name
+from chemdataextractor.parse.common import lbrct, dt, rbrct, hyphen
+from ..utils import first
+from ..model import Compound, GlassTransition 
+from .actions import merge, join
+from .base import BaseParser
+from .elements import W, I, R, Optional, Any, OneOrMore, Not, ZeroOrMore
+
+log = logging.getLogger(__name__)
+
+prefix = Optional(I('a')).hide() + (Optional(lbrct) + W('Tg') + Optional(rbrct) | I('glass') + Optional(I('transition')) + Optional((I('temperature') | I('range') | I('temp.'))) | W('transition') + Optional((I('temperature') | I('range') | I('temp.')))).hide() + Optional(lbrct + W('Tg') + rbrct) +  Optional(W('=') | I('of') | I('was') | I('is') | I('at')).hide() + Optional(I('in') + I('the') + I('range') + Optional(I('of')) | I('about') | ('around') | I('ca') | I('ca.')).hide()
+#prefix = Optional(I('a')).hide() + (Optional(lbrct) + W('Tg') + Optional(rbrct) | I('glass') + Optional(I('transition')) + Optional((I('temperature') | I('range') | I('temp.')))).hide() + Optional(lbrct + W('Tg') + rbrct) +  Optional(W('=') | I('of') | I('was') | I('is') | I('at')).hide() + Optional(I('in') + I('the') + I('range') + Optional(I('of')) | I('about') | ('around') | I('ca') | I('ca.')).hide()
+
+delim = R('^[:;\.,]$')
+
+# TODO: Consider allowing degree symbol to be optional. The prefix should be restrictive enough to stop false positives.
+units = (W('°') + Optional(R('^[CFK]\.?$')) | W('K\.?'))('units').add_action(merge)
+
+joined_range = R('^[\+\-–−]?\d+(\.\d+)?[\-–−~∼˜]\d+(\.\d+)?$')('value').add_action(merge)
+spaced_range = (R('^[\+\-–−]?\d+(\.\d+)?$') + Optional(units).hide() + (R('^[\-–−~∼˜]$') + R('^[\+\-–−]?\d+(\.\d+)?$') | R('^[\+\-–−]\d+(\.\d+)?$')))('value').add_action(merge)
+to_range = (R('^[\+\-–−]?\d+(\.\d+)?$') + Optional(units).hide() + (I('to') + R('^[\+\-–−]?\d+(\.\d+)?$') | R('^[\+\-–−]\d+(\.\d+)?$')))('value').add_action(join)
+temp_range = (Optional(R('^[\-–−]$')) + (joined_range | spaced_range | to_range))('value').add_action(merge)
+temp_value = (Optional(R('^[~∼˜\<\>]$')) + Optional(R('^[\-–−]$')) + R('^[\+\-–−]?\d+(\.\d+)?$'))('value').add_action(merge)
+temp = Optional(lbrct).hide() + (temp_range | temp_value)('value') + Optional(rbrct).hide()
+
+tg = (prefix + Optional(delim).hide() + temp + units)('tg')
+
+bracket_any = lbrct + OneOrMore(Not(tg) + Not(rbrct) + Any()) + rbrct
+
+cem_tg_phrase = (Optional(cem) + Optional(I('having')).hide() + Optional(delim).hide() + Optional(bracket_any).hide() + Optional(delim).hide() + Optional(lbrct) + tg + Optional(rbrct))('tg_phrase')
+
+obtained_tg_phrase = ((cem | chemical_label) + (I('is') | I('are') | I('was')).hide() + (I('measured') | I('obtained') | I('yielded')).hide() + ZeroOrMore(Not(tg) + Not(cem) + Any()).hide() + tg)('tg_phrase')
+
+#tg_phrase = cem_tg_phrase | method1_phrase | method2_phrase | method3_phrase | obtained_tg_phrase
+tg_phrase = cem_tg_phrase | obtained_tg_phrase
+
+
+class TgParser(BaseParser):
+    """"""
+    root = tg_phrase
+
+    #print ('outside parser', tg_phrase, type(tg_phrase))
+
+    def interpret(self, result, start, end):
+        compound = Compound(
+            glass_transitions=[
+                GlassTransition(
+                    value=first(result.xpath('./tg/value/text()')),
+                    units=first(result.xpath('./tg/units/text()'))
+                )
+            ]
+        )
+        cem_el = first(result.xpath('./cem'))
+        if cem_el is not None:
+            compound.names = cem_el.xpath('./name/text()')
+            compound.labels = cem_el.xpath('./label/text()')
+        yield compound
+