Source code for trifusion.process.data

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
#  
#  Copyright 2012 Unknown <diogo@arch>
#  
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#  
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#  
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#  MA 02110-1301, USA.

import re
from os.path import basename, splitext, join
from os import sep
from collections import OrderedDict


[docs]class PartitionException(Exception):
    def __init__(self, value):
        self.value = value

    def __str__(self):
        return repr(self.value)


[docs]class InvalidPartitionFile(Exception):
    def __init__(self, value):
        self.value = value

    def __str__(self):
        return repr(self.value)


[docs]class Partitions(object):
    """Alignment partitions interface for `Alignment` and `AlignmentList`.

    The Partitions class is used to define partitions for `Alignment`
    and `AlignmentList` objects and associate substitution models to
    each partition. After instantiating, partitions may be set in two ways:

      - Partition files: Being Nexus charset blocks and RAxML partition files
        currently supported
      - Tuple-like objects: Containing the ranges and names of the partitions

    Attributes
    ----------
    partition_length : int
        Length of the total partitions.
    partitions : OrderedDict
        Storage of partition names (key) and their range (values).
    partitions_index : list
        The index (starting point) for each partition, including codon
        partitions.
    partitions_alignments : OrderedDict
        Storage of the partition names (key) and their corresponding
        alignment files (values).
    alignments_range : OrderedDict
        Storage of the alignment names (key) and their range (values).
    models : OrderedDict
        Storage of partition names (key) and their models (values).
    merged_files : dict
        Storage of the original range (values) of every alignment file (key).
    counter : int
        Indicator of where the last partition ended.
    partition_format : str
        Format of the original partition file, if any.
    """

    _models = {"mrbayes": {}}

    # =========================================================================
    #   MrBayes models
    # =========================================================================
    """
    MrBayes substitution models are stored in the dictionary 
    _models["mrbayes"]. The keys of the dictionary are the name of the 
    substitution models (usually in capital letters) and the values will 
    contain the instructions to specific such model in a list. Each element 
    of the list corresponds to one line
    """

    # GTR
    _models["mrbayes"]["GTR"] = ["nst=6", "statefreqpr=dirichlet(1,1,1,1)"]

    # SYM
    _models["mrbayes"]["SYM"] = ["nst=6", "statefreqpr=fixed(equal)"]

    # HKY
    _models["mrbayes"]["HKY"] = ["nst=2", "statefreqpr=dirichlet(1,1,1,1)"]

    # K2P
    _models["mrbayes"]["K2P"] = ["nst=2", "statefreqpr=fixed(equal)"]

    # F81
    _models["mrbayes"]["F81"] = ["nst=1", "statefreqpr=dirichlet(1,1,1,1)"]

    # JC
    _models["mrbayes"]["JC"] = ["nst=1", "statefreqpr=fixed(equal)"]

    def __init__(self):

        self.partition_length = 0
        """
        The length of the locus may be necessary when partitions are defined
        in the input files using the "." notation, meaning the entire locus.
        Therefore, to convert this notation into workable integers, the size
        of the locus must be provided using the set_length method.
        """

        self.partitions = OrderedDict()
        """
        partitions will contain the name and range of the partitions for a given
        alignment object. Both gene and codon partitions will be stored in this
        attribute, but gene partitions are the main entries. An example of
        different stored partitions is::

            partitions = {"partitionA": ((0, 856), False),
                          "partitionB": ((857, 1450), [857,858,859] }

        "partitionA" is a simple gene partition ranging from 0 to 856, while
        "partitionB" is an assembly of codon partitions. The third element of
        the tuple is destined to codon partitions. If there are none, it should
        be False. If there are codon partitions, a list should be provided with
        the desired initial codons. In the example above, "partitionB" has
        actually 3 partitions starting at the first, second and third sequence
        nucleotide of the main partition.
        """

        self.partitions_index = []
        """
        partitions_index will remember the index of all added partitions. This
        attribute was created because codon models are added to the same parent
        partitions, thus losing their actual index. This is important for
        Nexus files, where models are applied to the index of the partition.
        This will simply store the partition names, which can be accessed using
        their index, or searched to return their index. To better support codon
        partitions, each entry in the partitions_index will consist in a list,
        in which the first element is the partition name, and the second element
        is the index of the subpartition. An example would be::

            partitions_index = [["partA", 0], ["partA", 1], ["partA", 2],
                                ["partB", 0]]

        in which, partA has 3 codon partitions, and partB has only one partition

        """

        self.partitions_alignments = OrderedDict()
        """
        The partitions_alignments attribute will associate the partition with
        the corresponding alignment files. For single alignment partitions,
        this will provide information on the file name. For multiple 
        alignments, besides the information of the file names, it will 
        associate which alignments are contained in a given partition and 
        support multi alignment partitions. An example would be::

            partitions_alignments = {"PartitionA": ["FileA.fas"],
                                     "PartitionB": ["FileB.fas", "FileC.fas"]}

        """

        self.alignments_range = OrderedDict()
        """
        """

        self.models = OrderedDict()
        """
        The self.models attribute will contain the same key list as
        self.partitions and will associate the substitution models to each
        partitions. For each partition, the format should be as follows::

            models["partA"] = [[[..model_params..]],[..model_names..],
                               ["12", "3"]]

        The first element is a list that may contain the substitution model
        parameters for up to three subpartitions, the second element is also
        a list with the corresponding names of the substitution models and
        the third list will store any links between models
        """

        self.merged_files = {}
        """
        This attribute will keep a record of the original ranges of every file
        that was merged. This is useful to split partitions according to files
        or to undo any changes. Each entry should be::

            {"alignment_file1": (0, 1234), "alignment_file2": (3444, 6291)}
        """

        self.counter = 0
        """
        The counter attribute will be used as an indication of where the last
        partition ends when one or more partitions are added
        """

        self.partition_format = None

    def __iter__(self):
        """Iterator behavior for `Partitions`.

        The class iterator will iterate over a list containing the partition
        names and a modified version of their ranges that is compatible with
        other software (unlike the 0 offset of python)

        Returns
        _ : iter
            Iterator of `partitions.items()`.
        """

        return iter(self.partitions.items())

[docs]    def reset(self, keep_alignments_range=False):
        """Clears partitions and attributes

        Clears partitions and resets object to __init__ state. The original
        alignment range can be retained by setting the `keep_alignments_range`
        argument to True.

        Parameters
        ----------
        keep_alignments_range : bool
            If True, the `alignments_range` attribute will not be reset.
        """

        self.partitions = OrderedDict()
        self.partitions_index = []
        self.partitions_alignments = OrderedDict()
        self.models = OrderedDict()
        self.counter = 0
        if not keep_alignments_range:
            self.alignments_range = OrderedDict()

[docs]    def iter_files(self):
        """Iterates over `partitions_alignments.items()`.

        Returns
        -------
        _ : iter
            Iterator of `partitions_alignments.items()`.
        """

        return iter(self.partitions_alignments.items())

[docs]    def set_length(self, length):
        """Set total length of current locus (over all partitions).

        Sets the length of the locus. This may be important to convert certain
        partition defining nomenclature, such as using the "." to indicate
        whole length of the alignment

        Parameters
        ----------
        length : int
            Integer that will be set as `partition_length`.
        """

        self.partition_length = length

    #===========================================================================
    # Parsers
    #===========================================================================

    @staticmethod
    def _get_file_format(partition_file):
        """Guesses the format of the partition file (Nexus or RAxML's).

        Returns
        -------
        partition_format : str
            Format of the partition file ("nexus" or "raxml").
        """

        file_handle = open(partition_file)

        # Skips first empty lines, if any
        header = file_handle.readline()
        while header.startswith("\n"):
            header = next(file_handle)

        fields = header.split()
        if fields[0].lower() == "charset":
            partition_format = "nexus"
        else:
            partition_format = "raxml"

        return partition_format

[docs]    def read_from_file(self, partitions_file):
        """Parses partitions from file

        This method parses a file containing partitions. It supports
        partitions files similar to RAxML's and NEXUS charset blocks. The
        NEXUS file, however, must only contain the charset block. The
        model_nexus argument provides a namespace for the model variable in
        the nexus format, since this information is not present in the file.
        However, it assures consistency on the Partition object.

        Parameters
        ----------
        partitions_file : str
            Path to partitions file.

        Raises
        ------
        PartitionException
            When one partition definition cannot be parsed.
        """

        # Resets previous partitions (except alignments_range)
        self.reset(keep_alignments_range=True)

        # Get the format of the partition file
        self.partition_format = self._get_file_format(partitions_file)

        part_file = open(partitions_file)

        # In order to support unsorted partition ranges, the complete
        # partition set will be stored temporary in memory. Even very large
        # partition files should result in relatively small data structures.
        # Once this variable is populated, it will be sorted according to the
        # first element of the range.
        temp_ranges = []

        # TODO: Add support for codon partitions in raxml format
        if self.partition_format == "raxml":
            for p, line in enumerate(part_file):

                # Ignore empty lines
                if line.strip() == "":
                    continue

                # A wrongly formatted raxml partition file may be provided, in
                # which case an IndexError exception will be raised. This will
                # handle that exception
                try:
                    fields = line.split(",")
                    # Get partition name as string
                    partition_name = fields[1].split("=")[0].strip()
                    # Get partition range as list of int
                    partition_range_temp = fields[1].split("=")[1]
                    try:
                        partition_range = [
                            int(x) - 1 for x in
                            partition_range_temp.strip().split("-")]

                    except ValueError as e:
                        # A ValueError may be raise when there is a "."
                        # notation in the partition range. If so, convert
                        # the "." to the sequence lenght. If no sequence lenght
                        # has been provided raise another exception
                        pr = partition_range_temp.strip().split("-")
                        if pr[1] == ".":
                            if self.partition_length:
                                partition_range = [int(pr[0]) - 1,
                                                   self.partition_length - 1]
                            else:
                                raise PartitionException(
                                    "The length of the locus must be "
                                    "provided when partitions are "
                                    "defined using '.' notation to "
                                    "mean full length")
                        else:
                            raise e

                    # Check which alignment file contains the current partition
                    if self.alignments_range:
                        try:
                            file_name = \
                                [x for x, y in self.alignments_range.items() if
                                 y[0] <= partition_range[0] < y[1]][0]
                        except IndexError:
                            file_name = None
                    else:
                        file_name = None

                    temp_ranges.append([partition_name, file_name,
                                        partition_range])

                except (IndexError, ValueError):
                    return InvalidPartitionFile(
                        "Badly formatted partitions file in line {} "
                        "with:\n\n{}".format(p + 1, line))

        elif self.partition_format == "nexus":
            for line in part_file:
                # Ignore empty lines
                if line.strip() != "":
                    try:
                        res = self.read_from_nexus_string(line,
                                                          return_res=True)
                    except PartitionException as e:
                        return e
                    if res:
                        temp_ranges.append(res)

        # Sort partition ranges according to the first element of the range
        temp_ranges.sort(key=lambda part: part[2][0])

        for name, file_name, part_range in temp_ranges:
            # Add information to partitions storage
            try:
                self.add_partition(name,
                                   locus_range=part_range,
                                   file_name=file_name)
            except InvalidPartitionFile as e:
                return e

[docs]    def read_from_nexus_string(self, nx_string, file_name=None,
                               return_res=False):
        """Parses a single nexus string with partition definition.

        Parameters
        ----------
        nx_string : str
            String with partition definition
        file_name : str, optional
            String with name of the file corresponding to the partition.
        return_res : bool
            If True, it will only return the parsed partition information.
            If False, it will add the parsed partition to the `Partitions`
            object.
        """

        try:
            fields = nx_string.split("=")
            partition_name = fields[0].split()[1].strip()

            # If this list has 2 elements, it should be a simple gene partition
            # If it has 3 elements, it should be a codon partition
            partition_full = re.split(r"[-\\]", fields[1].strip().
                                      replace(";", "").replace("/", "\\"))

            # If partition is defined using "." notation to mean full length
            if partition_full[1] == ".":
                if self.partition_length:
                    partition_range = [int(partition_full[0]) - 1,
                                       self.partition_length - 1]
                else:
                    raise PartitionException("The length of the locus must be "
                                             "provided when partitions are "
                                             "defined using '.' notation to "
                                             "mean full length")
            else:
                partition_range = [int(partition_full[0]) - 1,
                                   int(partition_full[1]) - 1]

            # Check which alignment file contains the current partition
            if self.alignments_range:
                try:
                    file_name = [x for x, y in self.alignments_range.items() if
                                 partition_range[0] in xrange(*y) and
                                 partition_range[1] in xrange(*y)][0]
                except IndexError:
                    pass

            if return_res:
                return [partition_name, file_name, partition_range]
            else:
                self.add_partition(partition_name, locus_range=partition_range,
                                   file_name=file_name)
        # If, for some reason, the current line cannot be interpreted as a
        # charset line, ignore it.
        except IndexError:
            if return_res:
                return None
            else:
                pass

[docs]    def get_partition_names(self):
        """Returns a list with the name of the partitions

        Returns
        -------
        names : list
            List with names of the partitions. When a parent
            partition has multiple codon partitions, it returns a partition
            name for every codon starting position present.
        """

        names = []

        for part, vals in self.partitions.items():
            if vals[1]:
                names.extend([part + "_%s" % (x + 1) for x in vals[1]])
            else:
                names.append(part)

        return names

[docs]    def read_from_dict(self, dict_obj):
        """Reads partition information from a dict object

        Parses partitions defined and stored in a special OrderedDict. The
        values of dict_obj should be the partition names and their
        corresponding values should contain the loci range and substitution
        model, if any.

        Parameters
        ----------
        dict_obj : OrderedDict
            Ordered dictionary with the definition of the partitions

        Examples
        --------
        Here is an example of a `dict_obj`::

            dict_obj = OrderedDict(("GeneA", [(0,234), "GTR"]),
                                   ("GeneB", [(235, 865), "JC"))
        """

        for k, v in dict_obj:
            # Determining if value contains only the range or the substitution
            # model as well
            if len(v) > 1:
                self.add_partition(k, locus_range=v[0])
            else:
                self.add_partition(k, locus_range=v[0])

[docs]    def is_single(self):
        """Returns whether the current `Partitions` has single or multiple
        partitions.

        Returns
        -------
            _ : bool
            Returns True is there is only a single partition defined,
            and False if there are multiple partitions.
        """

        if len(self.partitions) == 1:
            if not [x for x in self.partitions.values()][0][1]:
                return True
            else:
                return False
        else:
            return False

    def _find_parent(self, max_range):
        """Finds the parent partition from a specified range.

        Finds a parent partition of a codon partition.

        Parameters
        ----------
        max_range : int
            The maximum range of the codon partition.

        Returns
        -------
        part : str
            The name of the parent partition, from the `partitions` attribute.
        """

        for part, vals in self.partitions.items():
            lrange = vals[0]
            if lrange[1] == max_range:
                return part

[docs]    def add_partition(self, name, length=None, locus_range=None, codon=False,
                      use_counter=False, file_name=None, model_cls=None,
                      auto_correct_name=True):
        """Adds a new partition.

        Adds a new partition providing the length or the range of current
        alignment. If both are provided, the length takes precedence.The range
        of the partition should be in python index, that is, the first position
        should be 0 and not 1.

        Parameters
        ----------
        name : str
            Name of the partition.
        length : int, optional
            Length of the alignment.
        locus_range : list or tuple, optional
            Range of the partition.
        codon : list
            If the codon partitions are already defined, provide the
            starting points in list format, e.g: [1,2,3].
        use_counter : bool
            If True, `locus_range` will be updated according to the `counter`
            attribute.
        file_name : str
            Name of the alignment file.
        model_cls :
            Specified the substitution model that will be set in `models`.
        auto_correct_name : bool
            If set to True, when a partition name already exist, add a counter
            to the end of the name.

        Notes
        -----
        IMPORTANT NOTE on self.model: The self.model attribute was designed
        in a way that allows the storage of different substitution models
        inside the same partition name. This is useful for codon partitions that
        share the same parent partition name. So, for example, a parent
        partition named "PartA" with 3 codon partitions can have a different
        model for each one like this::

            self.models["PartA"] = [[[..model1_params..], [..model2_params..],
                [..model3_params..]], [GTR, GTR, GTR], ["1", "2", "3"]]

        """

        # Check for duplicate names in partitions
        if name in self.partitions:
            if auto_correct_name:
                c = 1
                while "{}_{}".format(name, c) in self.partitions:
                    c += 1
            else:
                raise PartitionException("Partition name %s is already in "
                                         "partition table" % name)

        # When length is provided
        if length:
            # Add to or update alignments_range attribute. This will store the
            # original range of the alignment
            if file_name:
                if file_name in self.alignments_range:
                    current_range = [self.counter, self.counter + (length - 1)]
                    # If start position is earlier than before, update
                    if current_range[0] < self.alignments_range[file_name][0]:
                        self.alignments_range[file_name][0] = current_range[0]
                    # If stop position if later than before, update
                    if current_range[1] > self.alignments_range[file_name][1]:
                        self.alignments_range[file_name][1] = current_range[1]
                else:
                    self.alignments_range[file_name] = [
                        self.counter, self.counter + (length - 1)]

            # Add partition to index list
            self.partitions_index.append([name, 0])
            # Add partition to alignment list
            try:
                self.partitions_alignments[name].append(file_name if file_name
                                                        else name)
            except KeyError:
                self.partitions_alignments[name] = [file_name if file_name else
                                                    name]
            # Create empty model attribute for a single partition
            self.models[name] = [[[]], [None], []]

            self.partitions[name] = [(self.counter,
                                      self.counter + (length - 1)), codon]
            self.counter += length
            self.partition_length += length

        # When a list/tuple range is provided
        elif locus_range:

            if use_counter:
                locus_range = (self.counter,
                               self.counter + locus_range[1] - locus_range[0])

            # Add to or update alignments_range attribute. This will store the
            # original range of the alignment
            if file_name:
                if file_name in self.alignments_range:
                    if locus_range[0] < self.alignments_range[file_name][0]:
                        self.alignments_range[file_name][0] = locus_range[0]
                    if locus_range[1] > self.alignments_range[file_name][1]:
                        self.alignments_range[file_name][1] = locus_range[1]
                else:
                    self.alignments_range[file_name] = list(locus_range)

            # If the maximum range of the current partition is already included
            # in some other partition, and no codon partitions were provided
            # using the "codon" argument, then it should be an undefined codon
            # partition and should be added to an existing partition
            if locus_range[1] <= self.counter and not codon:

                # Find the parent partition
                parent_partition = self._find_parent(locus_range[1])

                if not parent_partition:
                    raise InvalidPartitionFile(
                        "Could not find parent partition of {}. Check the"
                        " ranges of your partitions to ensure no range "
                        "overlaps".format(name))

                # If no codon partition is present in the parent partition,
                # create one
                if not self.partitions[parent_partition][1]:
                    # Add partition to index list
                    self.partitions_index.append([parent_partition, 1])
                    # Create empty model attribute for two partitions
                    self.models[parent_partition] = [[[], []], [None, None], []]

                    parent_start = self.partitions[parent_partition][0][0]
                    self.partitions[parent_partition][1] = [parent_start,
                                                            locus_range[0]]
                else:
                    # Create empty model attribute for additional partitions
                    self.models[parent_partition][0].append([])
                    self.models[parent_partition][1].append(None)

                    # Add partition to index list
                    self.partitions_index.append([parent_partition, 2])

                    self.partitions[parent_partition][1].append(locus_range[0])

            # If the start of the current partition is already within the range
            # of a previous partitions, raise an exception
            elif locus_range[0] < self.counter:
                raise InvalidPartitionFile(
                    "Badly formatted partition with range [{}-{}] starts "
                    "inside the range of a previous partitions ({})".format(
                        locus_range[0], locus_range[1], self.counter))

            # Else, create the new partition. If codon is provided, the codon
            # information is automatically added
            else:
                if model_cls:
                    self.models[name] = model_cls
                else:
                    # Create empty model attribute for a single partition
                    self.models[name] = [[[]], [None], []]
                if codon:
                    self.partitions_index = [[name, x] for x in codon]
                else:
                    # Add partition to index list
                    self.partitions_index.append([name, 0])
                try:
                    self.partitions_alignments[name].append(file_name if
                                                            file_name else name)
                except KeyError:
                    self.partitions_alignments[name] = [file_name if file_name
                                                        else name]

                self.partitions[name] = [(locus_range[0], locus_range[1]),
                                         codon]

                self.counter = locus_range[1] + 1
                self.partition_length = locus_range[1] + 1

[docs]    def remove_partition(self, partition_name=None, file_name=None):
        """Removes partitions.

        Removes a partitions by a given partition or file name. This will
        handle any necessary changes on the remaining partitions. The changes
        will be straightforward for most attributes, such as partitions_index,
        partitions_alignments and models, but it will require a re-structuring
        of partitions because the ranges of the subsequent partitions will
        have to be adjusted.

        Parameters
        ----------
        partition_name : str
            Name of the partition.
        file_name : str
            Name of the alignment file.
        """

        def rm_part(nm):
            """
            Remove a partition from self.partitions and update the ranges of
            the remaining partitions
            """

            del self.partitions[nm]

            new_dic = OrderedDict()

            counter = 0
            for nm, vals in self.partitions.items():
                # Check if the starting position of the next partition is the
                # same as the counter. If so, add the vals to the new dict.
                # Else, correct the ranges based on the counter
                if vals[0][0] == counter:
                    new_dic[nm] = vals
                    counter = vals[0][1] + 1
                else:
                    # Get lenght of the partition
                    part_len = vals[0][1] - vals[0][0]
                    # Create corrected range
                    part_range = (counter, counter + part_len)
                    # Correct codon position start if any
                    if vals[1]:
                        codon = [counter, counter + 1, counter + 2]
                    else:
                        codon = False
                    new_dic[nm] = [part_range, codon]
                    counter = counter + part_len + 1

            return new_dic

        def remove_routine(part_name):
            """
            Routine that removes a partition based on its name. It ca be used
            when calling the remove_partition method with the partition_name
            argument, or with the file_name argument when the partition only
            contains that file name
            """
            # Remove partition from partition_index
            self.partitions_index = [x for x in self.partitions_index if x[0] !=
                                     part_name]

            # Remove partitions_alignments
            del self.partitions_alignments[part_name]

            # Remove models
            del self.models[part_name]

            # Remove from partitions
            self.partitions = rm_part(part_name)

        if partition_name:
            # Raise exception if partition name does not exist
            if partition_name not in self.partitions:
                raise PartitionException("%s is not a partition name" %
                                         partition_name)

            remove_routine(partition_name)

        if file_name:

            # Set file_found to True, when there is a match. If no match is
            # found, raise a PartitionException at the end of the loop.
            file_found = False
            for part, file_list in self.partitions_alignments.items():
                if file_name in file_list:
                    file_found = True
                    # If the partitions consists only of the provided file,
                    # Remove the entire partition
                    if len(file_list) == 1:
                        remove_routine(part)
                    # If the partition contains other files, then only remove
                    # the current file from the partition
                    else:
                        self.partitions_alignments[part].remove(file_name)

            if not file_found:
                raise PartitionException("%s file does not belong to any"
                                         "partition" % file_name)

[docs]    def change_name(self, old_name, new_name):
        """Changes name of a partition.

        Parameters
        ----------
        old_name : str
            Original partition name.
        new_name : str
            New partition name.
        """

        self.partitions[new_name] = self.partitions.pop(old_name)
        self.partitions_alignments[new_name] = \
            self.partitions_alignments.pop(old_name)
        self.models[new_name] = self.models.pop(old_name)

[docs]    def merge_partitions(self, partition_list, name):
        """Merges multiple partitions into a single one.

        Parameters
        ----------
        partition_list : list
            List with partition names to be merged.
        name : str
            Name of new partition
        """

        def merger(ranges):
            """
            Generator that merges ranges in a list of tuples. For example,
            if ranges is [(1, 234), (235, 456), (560, 607), (607,789)]
            this generator will yield [(1, 456), (560, 789)]
            """
            previous = 0
            last_start = 0
            for st, en in ranges:
                if not previous:
                    last_start = st
                    previous = en
                elif st - 1 == previous:
                    previous = en
                else:
                    yield last_start, previous
                    previous = en
                    last_start = st

            yield last_start, en

        def flatter(s):
            """
            Creates a flat iterator of tuples. If s is [[(1,2), (2,3)], (4,5)]
            this will yield ((1,2), (2,3), (4,5))
            """
            for i in s:
                if isinstance(i, tuple):
                    yield i
                else:
                    for j in i:
                        yield j

        # Get new range
        new_range = [x for x in merger(flatter((y[0] for x, y in
                                               self.partitions.items()
                                               if x in partition_list)))]

        # Add entries for new partition
        self.partitions[name] = [new_range[0] if len(new_range) == 1 else
            new_range, False]
        self.partitions_alignments[name] = list(set([i for x, y in
                                            self.partitions_alignments.items()
                                            if x in partition_list for i in y]))
        self.models[name] = [[[]], [None], []]

        # Delete previous partitions and update merged dict
        for p in partition_list:
            if len(self.partitions_alignments[p]) == 1:
                self.merged_files[self.partitions_alignments[p][0]] = \
                    self.partitions[p][0]
            del self.partitions[p]
            del self.partitions_alignments[p]
            del self.models[p]

[docs]    def split_partition(self, name, new_range=None, new_names=None):
        """Splits one partition into two.

        Splits a partitions with `name` into two with the tuple list provided
        by `new_range`. If new_range is None, This will split the partition
        by its alignment files instead.

        Parameters
        ----------
        name : str
            Name of the partition to be split.
        new_range : list or tuple, optional
            List of two tuples, containing the ranges of the new partitions.
        new_names : list, optional
            The names of the new partitions.
        """

        if new_range:

            # Add new partitions
            for n, r in zip(new_names, new_range):
                self.partitions[n] = [r, False]
                # Create new partitions_alignments. Keep the original alignment
                # file for both
                self.partitions_alignments[n] = self.partitions_alignments[name]
                self.models[n] = [[[]], [None], []]

        else:

            for aln in self.partitions_alignments[name]:
                #  Get original range of alignment file
                new_range = self.merged_files[aln]
                # Add new partitions
                aln_name = basename(aln)
                self.partitions[aln_name] = [new_range, False]
                self.partitions_alignments[aln_name] = [aln]
                self.models[aln_name] = [[[]], [None], []]

        # Delete original partition
        del self.partitions[name]
        del self.partitions_alignments[name]
        del self.models[name]

    # ==========================================================================
    # Model handling
    # ==========================================================================

[docs]    def parse_nexus_model(self, string):
        """Parses a substitution model defined in a prset and/or lset command.

        Parameters
        ----------
        string : str
            String with the prset or lset command.
        """

        string = string.lower()

        # Find out which partitions the current parameters apply to. If
        # detected, it should be something like "applyto=(1,2)"
        applyto = re.findall(r"applyto=\(.*\)", string)
        # Find parameters
        nst = re.findall(r"nst=[0-9]", string)
        statefreqpr = re.findall(r"statefreqpr=.*\)", string)

        # Collect params
        params = [x[0] for x in [nst, statefreqpr] if x]

        if applyto:
            if applyto == ["applyto=(all)"]:
                for partition in self.partitions:
                    self.models[partition][0] += params
            else:
                # Get target partitions
                part_index = [int(x) for x in
                              re.split("[()]", applyto[0])[1].split(",")]
                for i in part_index:
                    part = self.partitions_index[i - 1]
                    # Get partition name
                    part_name = part[0]
                    # Get subpartition index. 0 if single partition, other if
                    # multiple subpartition
                    part_subpart = part[1]
                    self.models[part_name][0][part_subpart] += params

[docs]    def get_model_name(self, params):
        """Given a list of parameters, return the name of the model

        Parameters
        ----------
        p : list
            List of prset/lset parameters

        Returns
        -------
        model : str or None
            Returns the name of the model if it finds. Else, returns None.
        """

        for model, p in self._models["mrbayes"].items():
            if params == p:
                return model
        else:
            return None

[docs]    def set_model(self, partition, models, links=None, apply_all=False):
        """Sets substitution model for a given partition.

        Parameters
        ----------
        partition : str
            Partition name.
        models : list
            Model names for each of the three codon partitions. If there
            are no codon partitions, provide only a single element to the list.
        links : list
            Provide potential links between codon models. For
            example, if codon 1 and 2 are to be linked, it should be:
            links=["12", "3"]
        apply_all : bool
            If True, the current model will be applied to all partitions.
        """

        # Get list with partitions to be changed
        if apply_all:
            plist = [x for x in self.partitions]
        else:
            plist = [partition]

        # Replace "No model" string with None
        models = [None if x == "No model" else x for x in models]

        # Set model to the whole partition
        if len(models) == 1:
            # If the current partition was previously defined as having codon
            # partitions, revert it
            for p in plist:
                if self.partitions[p][1]:
                    self.partitions[p][1] = False
                self.models[p][1] = models

        # Set codon models
        else:
            for p in plist:
                # Change the partition in self.partitions to have codon
                # partitions
                st_idx = self.partitions[p][0][0]
                self.partitions[p][1] = [st_idx + x for x in range(3)]
                self.models[p][1] = models
                self.models[p][2] = links

[docs]    def write_to_file(self, output_format, output_file, model="LG"):
        """Writes partitions to a file.

        Writes the Partitions object into an output file according to the
        output_format. The supported output formats are RAxML and Nexus. The
        `model` option is for the RAxML format only.

        Parameters
        ----------
        output_format : str
            Output format of partitions file. Can be either "nexus" or
            "raxml".
        output_file : str
            Path to output file.
        model : str
            Name of the model for the partitions. "raxml" format only.
        """

        if output_format == "raxml":
            outfile_handle = open(output_file + ".part.File", "w")
            for part, rge in self.partitions.items():
                partition_range = "-".join([str(x + 1) for x in rge[0]])
                outfile_handle.write("%s, %s = %s\n" % (model,
                                                        part,
                                                        partition_range))

            outfile_handle.close()

        elif output_format == "nexus":
            outfile_handle = open(output_file + ".charset", "w")
            for part, rge in self.partitions.items():
                outfile_handle.write("charset %s = %s;\n" % (
                                     part,
                                     "-".join([str(x + 1) for x in rge[0]])))

            outfile_handle.close()

        return 0


[docs]class Zorro(object):
    """
    Class that handles the concatenation of zorro weights.

    Parameters
    ----------
    alignment_list : trifusion.process.sequence.AlignmentList
        AlignmentList object.
    suffix : str
        Suffix of the zorro weight files, based on the corresponding
        input alignments.
    zorro_dir : str
        Path to directory where zorro weight files are stored.
    """

    def __init__(self, alignment_list, suffix="_zorro.out", zorro_dir=None):

        self.weigth_values = []
        self.suffix = suffix

        for file_path in [x.path for x in alignment_list.alignments.values()]:
            # If zorro_dir is provided, use the specified path
            if zorro_dir:
                zorro_file = splitext(file_path.split(sep)[-1])[0]
                zorro_file = "{}{}.txt".format(join(zorro_dir, zorro_file), suffix)
            # If zorro_dir is not provided, use the same path as the input alignment
            else:
                zorro_file = file_path.split(".")[0] + self.suffix + ".txt"
            # alignment file is shared with the corresponding zorro file
            zorro_handle = open(zorro_file)
            self.weigth_values += [int(round(float(weigth.strip()))) for
                                   weigth in zorro_handle]

[docs]    def write_to_file(self, output_file):
        """ Creates a concatenated file with the zorro weights for the
        corresponding alignment files."""
        outfile = output_file + "_zorro.out"
        outfile_handle = open(outfile, "w")
        for weigth in self.weigth_values:
            outfile_handle.write("%s\n" % weigth)
        outfile_handle.close()


__author__ = "Diogo N. Silva"