Source code for PyOpenWorm.evidence

from PyOpenWorm import *



class EvidenceError(Exception):
    pass

def _pubmed_uri_to_pmid(uri):
    from urlparse import urlparse
    parsed = urlparse(uri)
    pmid = int(parsed.path.split("/")[2])
    return pmid

def _doi_uri_to_doi(uri):
    from urlparse import urlparse
    from urllib2 import unquote
    parsed = urlparse(uri)
    doi = parsed.path.split("/")[1]
    # the doi from a url needs to be decoded
    doi = unquote(doi)
    return doi

def _url_request(url,headers={}):
    import urllib2 as U
    try:
        r = U.Request(url, headers=headers)
        s = U.urlopen(r, timeout=1)
        return s
    except U.HTTPError:
        return ""
    except U.URLError:
        return ""

def _json_request(url):
    import json
    headers = {'Content-Type': 'application/json'}
    try:
        return json.load(_url_request(url,headers))
    except BaseException:
        return {}

class AssertsAllAbout(Property):
    # TODO: Needs tests!
    multiple=True
    def __init__(self, **kwargs):
        Property.__init__(self, 'asserts_all_about', **kwargs)

    def set(self, o, **kwargs):
        """Establish the "asserts" relationship for all of the properties of the given object"""
        self.owner.asserts(o)
        for p in o.properties:
            self.owner.asserts(p)

    def get(self, **kwargs):
        # traverse the hierarchy of ObjectProperties and return all of the asserts relationships...
        ns = { "ow": self.base_namespace,
               "ns1" : self.rdf_namespace,
               "ev": self.base_namespace["Evidence"] + "/",
               "ns2" : self.base_namespace["SimpleProperty"] + "/"
             }
        q = """
        SELECT ?DataObject ?x ?prop WHERE
        {
            ?DataObject rdf:type ow:DataObject .
            ?DataObject ?x ?DataObject_prop .
            ?DataObject_prop sp:value ?prop .
            ?Evidence ev:asserts ?Evidence_asserts .
            filter (EXISTS { ?DataObject_prop rdf:type ow:Property . })
        # object
        # asserts property pattern
        # general property pattern
        }
        """

    def triples(self, **kwargs):
        #XXX: All triples here are from ``asserts``
        return []

[docs]class Evidence(DataObject): """ A representation of some document which provides evidence like scholarly references, for other objects. Possible keys include:: pmid,pubmed: a pubmed id or url (e.g., 24098140) wbid,wormbase: a wormbase id or url (e.g., WBPaper00044287) doi: a Digitial Object id or url (e.g., s00454-010-9273-0) Attaching evidence ------------------- Attaching evidence to an object is as easy as:: e = Evidence(author='White et al.', date='1986') e.asserts(Connection(pre_cell="VA11", post_cell="VD12")) e.save() But what does this series of statements mean? For us it means that White et al. assert that "the cells VA11 and VD12 have a connection". In particular, it says nothing about the neurons themselves. Another example:: e = Evidence(author='Sulston et al.', date='1983') e.asserts(Neuron(name="AVDL").lineageName("AB alaaapalr")) e.save() This would say that Sulston et al. claimed that neuron AVDL has lineage AB alaaapalr. Now a more ambiguous example:: e = Evidence(author='Sulston et al.', date='1983') e.asserts(Neuron(name="AVDL")) e.save() What might this mean? There's no clear relationship being discussed as in the previous examples. There are two reasonable semantics for these statements. They could indicate that Sulston et al. assert everything about the AVDL (in this case, only its name). Or they could indicate that Sulston et al. state the existence of AVDL. We will assume the semantics of the latter for *most* objects. The second intention can be expressed as:: e = Evidence(author='Sulston et al.', date='1983') e.asserts_all_about(Neuron(name="AVDL")) e.save() `asserts_all_about` individually asserts each of the properties of the Neuron including its existence. It does not recursively assert properties of values set on the AVDL Neuron. If, for instance, the Neuron had a *complex object* as the value for its receptor types with information about the receptor's name primary agonist, etc., `asserts_all_about` would say nothing about these. However, `asserts_all` (TODO):: e.asserts_all(Neuron(name="AVDL",receptor=complex_receptor_object)) would make the aforementioned recursive statement. Retrieving evidence ------------------- .. Not tested with the latest Retrieving evidence for an object is trivial as well :: e = Evidence() e.asserts(Connection(pre_cell="VA11", post_cell="VD12")) for x in e.load(): print x This would print all of the evidence for the connection between VA11 and VD12 It's important to note that the considerations of recursive evidence assertions above do not operate for retrieval. Only evidence for the particular object queried (the Connection in the example above), would be returned and not any evidence for anything otherwise about VA11 or VD12. Attributes ---------- asserts : ObjectProperty (value_type=DataObject) When used with an argument, state that this Evidence asserts that the relationship is true. Example:: import bibtex bt = bibtex.parse("my.bib") n1 = Neuron("AVAL") n2 = Neuron("DA3") c = Connection(pre=n1,post=n2,class="synapse") e = Evidence(bibtex=bt['white86']) e.asserts(c) Other methods return objects which asserts accepts. Example:: n1 = Neuron("AVAL") r = n1.neighbor("DA3") e = Evidence(bibtex=bt['white86']) e.asserts(r) When used without arguments, returns a sequence of statements asserted by this evidence Example:: import bibtex bt = bibtex.parse("my.bib") n1 = Neuron("AVAL") n2 = Neuron("DA3") c = Connection(pre=n1,post=n2,class="synapse") e = Evidence(bibtex=bt['white86']) e.asserts(c) list(e.asserts()) # Returns a list [..., d, ...] such that d==c doi : DatatypeProperty A Digital Object Identifier (DOI) that provides evidence, optional pmid : DatatypeProperty A PubMed ID (PMID) that point to a paper that provides evidence, optional wormbaseid : DatatypeProperty An ID from WormBase that points to a record that provides evidence, optional author : DatatypeProperty The author of the evidence title : DatatypeProperty The title of the evidence year : DatatypeProperty The date (e.g., publication date) of the evidence uri : DatatypeProperty A URL that points to evidence Parameters ---------- doi : string A Digital Object Identifier (DOI) that provides evidence, optional pmid : string A PubMed ID (PMID) that point to a paper that provides evidence, optional wormbaseid : string An ID from WormBase that points to a record that provides evidence, optional author : string The author of the evidence title : string The title of the evidence year : string or int The date (e.g., publication date) of the evidence uri : string A URL that points to evidence """ def __init__(self, conf=False, **source): # The type of the evidence (a paper, a lab, a uri) is # determined by the `source` key # We keep track of a set of fields for the evidence. # Some of the fields are pulled from provided URIs and # some is provided by the user. # # Turns into a star graph # # Evidence field1 value1 # ; field2 value2 # ; field3 value3 . DataObject.__init__(self, conf=conf) self._fields = dict() Evidence.ObjectProperty('asserts', multiple=True, owner=self) AssertsAllAbout(owner=self) multivalued_fields = ('author', 'uri') for x in multivalued_fields: Evidence.DatatypeProperty(x, multiple=True, owner=self) other_fields = ('year', 'title', 'doi', 'wbid', 'pmid') fields = multivalued_fields + other_fields for x in other_fields: Evidence.DatatypeProperty(x, owner=self) #XXX: I really don't like putting these in two places for k in source: if k in ('pubmed', 'pmid'): self._fields['pmid'] = source[k] self._pubmed_extract() self.pmid(source[k]) if k in ('wormbaseid','wormbase', 'wbid'): self._fields['wormbase'] = source[k] self._wormbase_extract() self.wbid(source[k]) if k in ('doi',): self._fields['doi'] = source[k] self._crossref_doi_extract() self.doi(source[k]) if k in ('bibtex',): self._fields['bibtex'] = source[k] if k in fields: getattr(self,k)(source[k])
[docs] def add_data(self, k, v): """ Add a field Parameters ---------- k : string Field name v : string Field value """ self._fields[k] = v dp = Evidence.DatatypeProperty(k,owner=self) dp(v) # Each 'extract' method should attempt to fill in additional fields given which ones # are already set as well as correct fields that are wrong # TODO: Provide a way to override modification of already set values.
def _wormbase_extract(self): #XXX: wormbase's REST API is pretty sparse in terms of data provided. # Would be better off using AQL or the perl interface # _Very_ few of these have these fields filled in wbid = self._fields['wormbase'] def wbRequest(ident,field): return _json_request("http://api.wormbase.org/rest/widget/paper/"+wbid+"/"+field) # get the author j = wbRequest(wbid, 'authors') if 'fields' in j: f = j['fields'] if 'data' in f: self.author([x['label'] for x in f['data']]) elif 'name' in f: self.author(f['name']['data']['label']) # get the publication date j = wbRequest(wbid, 'publication_date') if 'fields' in j: f = j['fields'] if 'data' in f: self.year(f['data']['label']) elif 'name' in f: self.year(f['name']['data']['label']) def _crossref_doi_extract(self): # Extract data from crossref def crRequest(doi): import urllib as U data = {'q': doi} data_encoded = U.urlencode(data) return _json_request('http://search.labs.crossref.org/dois?%s' % data_encoded) doi = self._fields['doi'] if doi[:4] == 'http': doi = _doi_uri_to_doi(doi) r = crRequest(doi) #XXX: I don't think coins is meant to be used, but it has structured data... if len(r)>0: extra_data = r[0]['coins'].split('&') fields = (x.split("=") for x in extra_data) fields = [[y.replace('+', ' ').strip() for y in x] for x in fields] authors = [x[1] for x in fields if x[0] == 'rft.au'] for a in authors: self.author(a) # no error for bad ids, just an empty list if len(r) > 0: # Crossref can process multiple doi's at one go and return the metadata. we just need the first one r = r[0] if 'title' in r: self.title(r['title']) if 'year' in r: self.year(r['year']) def _pubmed_extract(self): def pmRequest(pmid): import xml.etree.ElementTree as ET # Python 2.5 and up base = 'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/' # XXX: There's more data in esummary.fcgi?, but I don't know how to parse it url = base + "esummary.fcgi?db=pubmed&id=%d" % pmid return ET.parse(_url_request(url)) pmid = self._fields['pmid'] if pmid[:4] == 'http': # Probably a uri, right? pmid = _pubmed_uri_to_pmid(pmid) pmid = int(pmid) tree = pmRequest(pmid) for x in tree.findall('./DocSum/Item[@Name="AuthorList"]/Item'): self.author(x.text)