Skip to content

Commit

Permalink
Add option to automatically rename table attributes when matching gen…
Browse files Browse the repository at this point in the history
…es with match_table_attributes
  • Loading branch information
JakaKokosar committed Oct 1, 2020
1 parent 02fd03c commit 679f264
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 16 deletions.
36 changes: 23 additions & 13 deletions orangecontrib/bioinformatics/ncbi/gene/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def load_attributes(self, values: Tuple[str, ...], attributes: Tuple[str, ...] =
setattr(self, attr, json.loads(val) if attr in ('synonyms', 'db_refs', 'homologs') else val)

def homolog_gene(self, taxonomy_id: str) -> Optional[str]:
""" Returns gene homolog for given organism.
"""Returns gene homolog for given organism.
Parameters
----------
Expand Down Expand Up @@ -95,7 +95,7 @@ def genes(self, genes: List[str]) -> None:
self._match()

def get_known_genes(self) -> List[Gene]:
""" Return Genes with known Entrez ID
"""Return Genes with known Entrez ID
Returns
-------
Expand All @@ -106,7 +106,7 @@ def get_known_genes(self) -> List[Gene]:
return [gene for gene in self.genes if gene.gene_id]

def to_data_table(self, selected_genes: Optional[List[str]] = None) -> Table:
""" Transform GeneMatcher results to Orange data table.
"""Transform GeneMatcher results to Orange data table.
Optionally we can provide a list of genes (Entrez Ids).
The table on the output will be populated only with provided genes.
Expand Down Expand Up @@ -184,7 +184,7 @@ def to_data_table(self, selected_genes: Optional[List[str]] = None) -> Table:
def match_table_column(
self, data_table: Table, column_name: str, target_column: Optional[StringVariable] = None
) -> Table:
""" Helper function for gene name matching with :class:`Orange.data.Table`.
"""Helper function for gene name matching with :class:`Orange.data.Table`.
Give a column of genes, GeneMatcher will try to map genes to their
corresponding Entrez Ids.
Expand Down Expand Up @@ -223,8 +223,8 @@ def match_table_column(

return new_data

def match_table_attributes(self, data_table):
""" Helper function for gene name matching with :class:`Orange.data.Table`.
def match_table_attributes(self, data_table, rename=False, source_name='Source ID') -> Table:
"""Helper function for gene name matching with :class:`Orange.data.Table`.
Match table attributes and if a unique match is found create a new column attribute
for Entrez Id. Attribute name is defined here: `orangecontrib.bioinformatics.ncbi.gene.config.NCBI_ID`
Expand All @@ -237,18 +237,28 @@ def match_table_attributes(self, data_table):
Returns
-------
:class:`Orange.data.Table`
Data table column attributes are populated with Entrez Ids
"""
input_gene_names = [var.name for var in data_table.domain.attributes]

if input_gene_names:
self.genes = input_gene_names
# run gene matcher
self.genes = [var.name for var in data_table.domain.attributes]

def helper(gene, attribute):
if gene.gene_id:
if rename:
attribute = attribute.renamed(gene.symbol)
attribute.attributes[source_name] = gene.input_identifier

attribute.attributes[ENTREZ_ID] = gene.gene_id
return attribute

attributes = [helper(gene, attr) for gene, attr in zip(self.genes, data_table.domain.attributes)]
domain = Domain(attributes, data_table.domain.class_vars, data_table.domain.metas)

for gene in self.genes:
if gene.gene_id:
data_table.domain[gene.input_identifier].attributes[ENTREZ_ID] = gene.gene_id
return data_table.transform(domain)

def match_genes(self):
self._match()
Expand Down Expand Up @@ -299,7 +309,7 @@ def _match(self):

class GeneInfo(dict):
def __init__(self, tax_id: str):
""" Loads genes for given organism in a dict.
"""Loads genes for given organism in a dict.
Each instance of :class:`Gene` is mapped to corresponding Entrez ID
Expand Down
3 changes: 2 additions & 1 deletion orangecontrib/bioinformatics/tests/ncbi/test_gene.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,11 @@ def test_match_table_attributes(self):

data = Table('brown-selected.tab')
data = Table.transpose(data, feature_names_column='gene')
gm.match_table_attributes(data)
data = gm.match_table_attributes(data, rename=True, source_name='FooBar')

for column in data.domain.attributes:
self.assertTrue(ENTREZ_ID in column.attributes)
self.assertTrue('FooBar' in column.attributes)


class TestGeneInfo(unittest.TestCase):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,7 @@ def set_progress():
state.set_status('Matching genes ...')
tax_id = species_name_to_taxid(species)
gm = GeneMatcher(tax_id)
gm.match_table_attributes(table)
table = gm.match_table_attributes(table, rename=True)
table.attributes[TableAnnotation.tax_id] = tax_id
table.attributes[TableAnnotation.gene_as_attr_name] = True
table.attributes[TableAnnotation.gene_id_attribute] = 'Entrez ID'
Expand Down
2 changes: 1 addition & 1 deletion orangecontrib/bioinformatics/widgets/OWdictyExpress.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ def send_to_output(self, result):
data = gene_matcher.match_table_column(data, 'Gene', StringVariable(ENTREZ_ID))
data.attributes[GENE_ID_COLUMN] = ENTREZ_ID
else:
gene_matcher.match_table_attributes(data)
data = gene_matcher.match_table_attributes(data)
data.attributes[GENE_ID_ATTRIBUTE] = ENTREZ_ID

# add table attributes
Expand Down

0 comments on commit 679f264

Please sign in to comment.