Source code for dRFEtools.metrics.ranking

"""
This script ranks features within the feature elimination loop.
Originally developed by Tarun Katipalli.
Edits and package management by Kynon Jade Benjamin
"""

__author__ = "Tarun Katipalli"

import numpy as np
import pandas as pd
from os.path import join, exists

__all__ = ["features_rank_fnc"]


[docs] def features_rank_fnc(features, rank, n_features_to_keep, fold, out_dir, RANK): """ Ranks features and writes the results to a file Args: features: A vector of feature names rank: A vector with feature ranks based on absolute value of feature importance n_features_to_keep (int): Number of features to keep. fold (int): Current fold being analyzed. out_dir (str): Output directory for text file. Default is current directory. RANK (bool): Whether to perform ranking and write results. Returns: None Writes: Text file: Ranked features by fold tab-delimitated text file """ if not RANK: return if not isinstance(n_features_to_keep, int) or n_features_to_keep < 0: raise ValueError("n_features_to_keep must be a non-negative integer") if not isinstance(fold, int) or fold < 0: raise ValueError("fold must be a non-negative integer") if len(features) != len(rank): raise ValueError("Length of features and rank must be the same") features = np.array(features) rank = np.array(rank) eliminated = rank[n_features_to_keep:] if len(eliminated) == 0: rank_df = pd.DataFrame({"Geneid": features[rank], "Fold": fold, "Rank": 1}) else: rank_df = pd.DataFrame( { "Geneid": features[eliminated], "Fold": fold, "Rank": np.arange( n_features_to_keep + 1, n_features_to_keep + 1 + len(eliminated) ), } ) output_file = join(out_dir, "rank_features.txt") rank_df.sort_values("Rank", ascending=False).to_csv( output_file, sep="\t", mode="a", index=False, header=not exists(output_file) )