Source code for velocyto.feature

from typing import *
from collections import defaultdict
import logging
import velocyto as vcy


[docs]class Feature:
    """A simple class representing an annotated genomic feature (e.g. exon, intron, masked repeat)"""
    __slots__ = ["start", "end", "kind", "exin_no", "is_validated", "transcript_model"]
    
    def __init__(self, start: int, end: int, kind: int, exin_no: str, transcript_model: Any=None) -> None:
        self.start = start
        self.end = end
        self.transcript_model = transcript_model
        self.kind = kind  # it should be ord("e"), ord("i"), ord("m"), ....
        self.exin_no = int(exin_no)
        self.is_validated = False
    
    def __lt__(self, other: Any) -> bool:
        if self.start == other.start:
            return self.end < other.end
        return self.start < other.start

    def __gt__(self, other: Any) -> bool:
        if self.start == other.start:
            return self.end > other.end
        return self.start > other.start

    def __len__(self) -> int:
        return (self.end - self.start) + 1
        
    def __repr__(self) -> str:
        if self.transcript_model is None:
            return f"Feature not linked to Transcript Model: {self.start}-{self.end} {chr(self.kind)}{self.exin_no}"
        return f"Feature: chr{self.transcript_model.chromstrand}:{self.start}-{self.end} {self.transcript_model.trname}\
    ({self.transcript_model.trid}) {chr(self.kind)}{self.exin_no} {self.transcript_model.genename}({self.transcript_model.geneid})"

    @property
    def is_last_3prime(self) -> bool:
        if self.transcript_model.chromstrand[-1] == "+":
            return self == self.transcript_model.list_features[-1]
        else:
            return self == self.transcript_model.list_features[0]

[docs]    def get_downstream_exon(self) -> Any:
        """To use only for introns. Returns the vcy.Feature corresponding to the neighbour exon downstream

        Note
        ----
        In a 15 exons transcript model:
        Downstream to intron10 is exon11 or the interval with index `20` if strand "+".
        Downtream to intron10 is exon10 or the interval with index `10` if strand "-"
        """
        if self.transcript_model.chromstrand[-1] == "+":
            ix = self.exin_no * 2
        else:
            # in the case on strand -
            ix = len(self.transcript_model.list_features) - 2 * self.exin_no + 1
        return self.transcript_model.list_features[ix]

[docs]    def get_upstream_exon(self) -> Any:
        """To use only for introns. Returns the vcy.Feature corresponding to the neighbour exon downstream

        Note
        ----
        In a 15 exons transcript model:
        Upstream to intron10 is exon9 or the interval with inxex `18` if strand "+".
        Upstream to intron10 is exon11 or the interval with inxex `8` if strand "-"
        """
        if self.transcript_model.chromstrand[-1] == "+":
            ix = (self.exin_no * 2) - 2
        else:
            # in the case on strand -
            ix = len(self.transcript_model.list_features) - 2 * self.exin_no - 1
        return self.transcript_model.list_features[ix]

    # if self.chromstrand[-1] == "+":
    #             intron_number = self.list_features[-1].exin_no
    #         else:
    #             intron_number = self.list_features[-1].exin_no - 1

[docs]    def ends_upstream_of(self, read: vcy.Read) -> bool:
        """The following situation happens
                                                            Read
                                               *|||segment|||-?-||segment|||????????
                ???????|||||Ivl|||||||||*

        """
        return self.end < read.pos  # NOTE: pos is diffetent from start, consider chagning

[docs]    def doesnt_start_after(self, segment: Tuple[int, int]) -> bool:
        """One of the following situation happens

                            *||||||segment|||||????????
            *||||Ivl|||||*
                *|||||||||||||Ivl||||||||||????????????
                                    *|||||||||||||Ivl||||||||||????????????
                                              *|||||||||||||Ivl||||||||||????????????

        """
        return self.start < segment[-1]

[docs]    def intersects(self, segment: Tuple[int, int], minimum_flanking: int=vcy.MIN_FLANK) -> bool:
        return (segment[-1] - minimum_flanking > self.start) and\
               (segment[0] + minimum_flanking < self.end)  # and ((segment[-1] - segment[0]) > minimum_flanking)

[docs]    def contains(self, segment: Tuple[int, int], minimum_flanking: int=vcy.MIN_FLANK) -> bool:
        """One of following situation happens

            *-----||||||segment|||||-----*
                *|||||||||||||Ivl||||||||||||||||*

                  *-----||||||segment|||||-----*
                *|||||||||||||Ivl||||||||||||||||*

                      *-----||||||segment|||||-----*
                *|||||||||||||Ivl||||||||||||||||*

        where `---` idicates the minimum flanking
        """
        return (segment[0] + minimum_flanking >= self.start) and (segment[-1] - minimum_flanking <= self.end) and ((segment[-1] - segment[0]) > minimum_flanking)

[docs]    def start_overlaps_with_part_of(self, segment: Tuple[int, int], minimum_flanking: int=vcy.MIN_FLANK) -> bool:
        """The following situation happens

          *---|||segment||---*
                *|||||||||||||Ivl||||||||||||||||*

        where `---` idicates the minimum flanking

        """
        return (segment[0] + minimum_flanking < self.start) and (segment[-1] - minimum_flanking > self.start)

[docs]    def end_overlaps_with_part_of(self, segment: Tuple[int, int], minimum_flanking: int=vcy.MIN_FLANK) -> bool:
        """The following situation happens

                                      *---|||segment||---*
                *|||||||||||||Ivl||||||||||||||||*

        where `---` idicates the minimum flanking
            
        """
        return (segment[0] + minimum_flanking < self.end) and (segment[-1] - minimum_flanking > self.end)