"""
Takes in descriptor feature file and predicts
"""
import joblib
import pandas as pd
from pathlib import Path

label_field_to_exclude = [
    'tagg',
    'tm',
    'tmonset'
]

current_dir = Path(__file__).parent
model_dir = current_dir / "models"
print(model_dir)

model_map = {
    "tagg" : model_dir / "tagg/efs_best_knn.pkl",
    "tm" : model_dir / "tm/efs_best_randomforest.pkl",
    "tmon" : model_dir / "tmon/efs_best_elasticnet.pkl",
}

# data files that contain the features
data_files = {
    "tagg" : model_dir / "tagg/rf_efs.csv",
    "tm" : model_dir / "tm/rf_efs.csv",
    "tmon" : model_dir / "tmon/rf_efs.csv",
}


def build_model_feature_col_map():
    feature_col_map = {}
    for model, file in data_files.items():
        df = pd.read_csv(file)
        feature_col_map[model] = df.columns.tolist()[1:]
    return feature_col_map

# maps model to the columns that contain the features
model_feature_col_map = build_model_feature_col_map()
print(model_feature_col_map)

# Note: Holdout files are in the AbMelt directory for testing only
# In production, descriptors come from the pipeline
abmelt_data_dir = current_dir.parent / "AbMelt" / "data"
holdout_files = {
    "tagg" : abmelt_data_dir / "tagg/holdout.csv",
    "tm" : abmelt_data_dir / "tm/holdout.csv",
    "tmon" : abmelt_data_dir / "tmon/holdout.csv",
}

def infer_using_descriptors(model_name, descriptor_file, feature_names):
    model = joblib.load(model_map[model_name])
    df = pd.read_csv(descriptor_file)
    print(f"df shape: {df.shape}, columns: {df.columns}")
    df_features = df[feature_names]
    predictions = model.predict(df_features)
    return predictions

def main():
    for model_name, descriptor_file in holdout_files.items():
        feature_names = model_feature_col_map[model_name]
        feature_names = [feature_name for feature_name in feature_names if feature_name not in label_field_to_exclude]
        print(f"Model: {model_name}")
        print(f"Feature names: {feature_names}")
        # continue
        predictions = infer_using_descriptors(model_name, descriptor_file, feature_names)
        print(predictions)

if __name__ == "__main__":
    main()