Source code for velocyto.transcript_model

from typing import *
import velocyto as vcy


[docs]class TranscriptModel:
    """A simple object representing a transcript model as a list of `vcy.Feature` objects
    """
    __slots__ = ["trid", "trname", "geneid", "genename", "chromstrand", "list_features"]
    
    def __init__(self, trid: str, trname: str, geneid: str, genename: str, chromstrand: str) -> None:
        self.trid = trid
        self.trname = trname
        self.geneid = geneid
        self.genename = genename
        self.chromstrand = chromstrand
        self.list_features: List[vcy.Feature] = []
    
    def __iter__(self) -> vcy.Feature:
        for i in self.list_features:
            yield i

    def __lt__(self, other: Any) -> bool:
        assert self.chromstrand == other.chromstrand, "`<`(.__lt__) not implemented for different chromosomes"
        return self.list_features[0].start < other.list_features[0].start

    def __gt__(self, other: Any) -> bool:
        assert self.chromstrand == other.chromstrand, "`>` (.__gt__) not implemented for different chromosomes"
        return self.list_features[0].start > other.list_features[0].start

    @property
    def start(self) -> int:
        """ NOTE: This should be accessed only after the creation of the transcript model is finished
        (i.e.) after append_exon has been called to add all the exons/introns
        """
        return self.list_features[0].start

    @property
    def end(self) -> int:
        """NOTE: This should be accessed only after the creation of the transcript model is finished
        (i.e.) after append_exon has been called to add all the exons/introns
        """
        return self.list_features[-1].end
        
[docs]    def ends_upstream_of(self, read: vcy.Read) -> bool:
        # one could consider to add TOLERANCE
        # note that ``self.list_features[-1]`` is the last exon if strand + and first exons for strand -
        return self.list_features[-1].end < read.pos

[docs]    def intersects(self, segment: Tuple[int, int], minimum_flanking: int=vcy.MIN_FLANK) -> bool:
        return (segment[-1] - minimum_flanking > self.start) and\
               (segment[0] + minimum_flanking < self.end)  # and ((segment[-1] - segment[0]) > minimum_flanking)

[docs]    def append_exon(self, exon_feature: vcy.Feature) -> None:
        """Append an exon and create an intron when needed

        Arguments
        ---------
        exon_feature: vcy.Feature
            A feature object represneting an exon to add to the transcript model.
        """
        exon_feature.transcript_model = self
        if len(self.list_features) == 0:
            # first/last exon
            self.list_features.append(exon_feature)
        else:
            # Some exon already exissted
            if self.chromstrand[-1] == "+":
                intron_number = self.list_features[-1].exin_no
            else:
                intron_number = self.list_features[-1].exin_no - 1
            self.list_features.append(vcy.Feature(start=self.list_features[-1].end + 1,
                                                  end=exon_feature.start - 1,
                                                  kind=ord("i"),
                                                  exin_no=intron_number,
                                                  transcript_model=self))
            self.list_features.append(exon_feature)

[docs]    def chop_if_long_intron(self, maxlen: int=vcy.LONGEST_INTRON_ALLOWED) -> None:
        """Modify a Transcript model choppin the 5' region upstram of a very long intron
        To avoid that extremelly long intron mask the counting of interal genes

        Arguments
        ---------
        maxlen: int, default=vcy.LONGEST_INTRON_ALLOWED
            transcript model tha contain one or more intronic interval of len == maxlen will be chopped

        Returns
        -------
        Nothing it will call `_remove_upstream_of` or `_remove_downstream_of` on the transcript model
        its name will be changed appending `_mod` to both trid and trname
         
        """
        long_feats = [i for i in self.list_features if len(i) > maxlen and i.kind == ord("i")]
        if len(long_feats):
            if self.chromstrand[-1] == "+":
                self._remove_upstream_of(long_feats[-1])
            else:  # self.chromstrand[-1] == "-"
                self._remove_downstream_of(long_feats[0])
            self.trid = self.trid + "_mod"
            self.trname = self.trname + "_mod"

    def _remove_upstream_of(self, longest_feat: vcy.Feature) -> None:
        tmp = []
        ec = 1
        ic = 1
        for feat in self.list_features:
            if feat > longest_feat:
                if feat.kind == ord("e"):
                    feat.exin_no = ec
                    ec += 1
                    tmp.append(feat)
                elif feat.kind == ord("i"):
                    feat.exin_no = ic
                    ic += 1
                    tmp.append(feat)
        self.list_features = tmp

    def _remove_downstream_of(self, longest_feat: vcy.Feature) -> None:
        tmp = []
        ec = 1
        ic = 1
        for feat in self.list_features[::-1]:
            if feat < longest_feat:
                if feat.kind == ord("e"):
                    feat.exin_no = ec
                    ec += 1
                    tmp.append(feat)
                elif feat.kind == ord("i"):
                    feat.exin_no = ic
                    ic += 1
                    tmp.append(feat)
        self.list_features = tmp[::-1]

    def __repr__(self) -> str:
        list_feats = '-'.join(f"{chr(i.kind)}{i.exin_no}" for i in self.list_features)
        return f"<TrMod {self.trid}\t{list_feats}\tat {hex(id(self))}>"