from sklearn.preprocessing import StandardScaler
import lightgbm as lgb

def predict(X, output_filename):
    #Store all the positions
    pos = [X[i][-1] for i in range(len(X))]
    
    #Scale the data
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    #Perform Prediction
    lgbm = lgb.Booster(model_file='./codes/model.txt')
    preds_proba = lgbm.predict(X)
    preds = [0 if i<=0.5 else 1 for i in preds_proba]

    #Get the sequence
    fp = open("./data/mtx")
    seq = fp.readline()
    seq = fp.readline().strip()
    fp.close()

    filename = "pps.csv"
    #Write out the predictions
    if len(output_filename) > 0:
        filename = output_filename
    fp = open("./output/"+filename, "w")
    fp.write("Amino Acid,Position,Prediction\n")
    for i in range(len(X)):
        fp.write(seq[pos[i]] + "," + str(pos[i]) + "," + str(preds[i]) + "\n")
    fp.close()
