The Perl Toolchain Summit needs more sponsors. If your company depends on Perl, please support this very important event.
/*
 * 12 September 2001
 *
 * Score sequences according to a substitution matrix.
 * The functions here work on 
 *  * sequence sequence scores
 *  * sequence to profile scores
 *  * profile to profile scores
 *
 * $Id: score_smat.c,v 1.1 2007/09/28 16:57:10 mmundry Exp $
 */

#include <stdio.h>
#include <stdlib.h>

#include "amino_a.h"
#include "mprintf.h"
#include "read_seq_i.h"
#include "score_mat.h"
#include "score_smat.h"
#include "seq.h"
#include "seqprof.h"
#include "sub_mat.h"

/* ---------------- inner_score_2_seq -------------------------
 * We are given two sequences. Score them using the score
 * matrix from *smat.
 * We do not allocate any memory. It is up to the caller to
 * give us a matrix with enough space.
 * Note the convention that the score matrix is (m+2)(n+2) for
 * sequences of m and n.
 */
static int
inner_score_2_seq (float **scores, struct seq *s1,
                   struct seq *s2, const struct sub_mat *smat)
{
    size_t i, j;
    size_t n_rows = s1->length + 2;
    size_t n_cols = s2->length + 2;

    seq_std2thomas (s1);
    seq_std2thomas (s2);

    /* matrix is n_rows (s1), n_cols (s2) */
    for (i = 0; i < n_cols ; i++)
        scores[0][i] = 0;                   /* First row */
    for (i = 0; i < n_cols ; i++)
        scores[n_rows - 1][i] = 0;          /* Last row */
    for (i = 0; i < n_rows ; i++)
        scores [i][0] = 0;                  /* First column */
    for (i = 0; i < n_rows ; i++)
        scores [i][n_cols - 1 ] = 0;        /* Last column */

    for (i = 0; i < n_rows - 2; i++) {
        for (j = 0; j < n_cols - 2 ; j++) {
            int sc = s1->seq[i];
            int tc = s2->seq[j];
            scores [i+1][j+1] = smat->data[sc][tc];
        }
    }

    return EXIT_SUCCESS;
}


/* ---------------- score_smat  -------------------------------
 * Score two sequences using a substitution matrix.
 */
int
score_smat (struct score_mat *score_mat, struct seq *s1,
             struct seq *s2, const struct sub_mat *smat)
{
    const char *this_sub = "score_smat";
    extern const char *mismatch;

    if (( score_mat->n_rows != s1->length + 2) ||
        ( score_mat->n_cols != s2->length + 2)) {
        err_printf (this_sub, mismatch,
                    score_mat->n_rows - 2, score_mat->n_cols - 2,
                    s1->length, s2->length);
        goto bail_out;
    }
    if (inner_score_2_seq (score_mat->mat, s1, s2, smat) == EXIT_FAILURE)
        goto bail_out;

    return (EXIT_SUCCESS);
 bail_out:
    err_printf (this_sub, "Serious scoring error\n");
    return (EXIT_FAILURE);
}

/* ---------------- inner_score_prof_seq ----------------------
 * Given a profile and a sequence, score them according to a
 * substitution matrix.
 * We do not allocate any memory. It is up to the caller to
 * give us a matrix with enough space.
 * Note the convention that the score matrix is (m+2)(n+2) for
 * sequences of m and n.
 * Optimisation note: The inner loop below splits the multiplication
 * and summation into separate loops and uses a temporary array.
 * Reading the output from the intel compiler, it seems this simplifies
 * the loop structure and lets it optimise a bit more. Presumably, the
 * simpler loop structure is good for any compiler.
 */
static void
inner_score_prof_seq (float **scores, struct seqprof *sp,
                   struct seq *seq, const struct sub_mat *smat)
{
    size_t i, j;
    const size_t n_rows = sp->nres + 2;
    const size_t n_cols = seq->length + 2;

    seq_std2thomas (seq);

    /* matrix is n_rows (s1), n_cols (s2) */
    for (i = 0; i < n_cols ; i++)
        scores[0][i] = 0;                   /* First row */
    for (i = 0; i < n_cols ; i++)
        scores[n_rows - 1][i] = 0;          /* Last row */
    for (i = 0; i < n_rows ; i++)
        scores [i][0] = 0;                  /* First column */
    for (i = 0; i < n_rows ; i++)
        scores [i][n_cols - 1 ] = 0;        /* Last column */

    for (i = 0; i < n_rows - 2; i++) {                 /* profile */
        const size_t i_ndx = i + 1;
        for ( j = 0; j < n_cols - 2; j++) {            /* sequence */
            float tmpscore = 0;
            const int sc = seq->seq[j];
            float *tmp1 = sp->freq_mat[i];
            float tmparr [blst_afbet_size];
            size_t k, m;
            for ( k = 0; k < blst_afbet_size; k++)
                tmparr[k]= tmp1[k] * smat->data[sc][k];
            for ( m = 0; m < blst_afbet_size; m++)
                tmpscore += tmparr[m];
            scores [i_ndx][j+1] = tmpscore;
        }
    }
}


/* ---------------- inner_score_prof_prof ----------------------
 * Score a profile/profile pair.
 */
static void
inner_score_prof_prof (float **scores, const struct seqprof *sp1,
                   const struct seqprof *sp2, const struct sub_mat *smat)
{
    size_t i, j, k;
    const size_t n_rows = sp1->nres + 2;
    const size_t n_cols = sp2->nres + 2;

    /* matrix is n_rows (s1), n_cols (s2) */
    for (i = 0; i < n_cols ; i++)
        scores[0][i] = 0;                   /* First row */
    for (i = 0; i < n_cols ; i++)
        scores[n_rows - 1][i] = 0;          /* Last row */
    for (i = 0; i < n_rows ; i++)
        scores [i][0] = 0;                  /* First column */
    for (i = 0; i < n_rows ; i++)
        scores [i][n_cols - 1 ] = 0;        /* Last column */

    for (i = 0; i < n_rows - 2; i++) {                 /* profile */
        const size_t i_ndx = i + 1;
        for ( j = 0; j < n_cols - 2; j++) {            /* other profile */
            float tmpscore = 0;
            for ( k = 0; k < blst_afbet_size; k++) {
                size_t m, n;
                const float f1 = sp1->freq_mat[i][k];
                float *ftmp2 = sp2->freq_mat[j];
                float tarray[blst_afbet_size];
                for ( m = 0; m < blst_afbet_size; m++)
                    tarray [m] = f1 * ftmp2[m] * smat->data[k][m];
                for ( n = 0; n < blst_afbet_size; n++)
                    tmpscore += tarray[n];
            }
            scores [i_ndx][j+1] = tmpscore;
        }
    }
}


/* ---------------- score_sprof -------------------------------
 * Score a sequence and a sequence profile given a
 * substitutqion matrix.
 */
int
score_sprof (struct score_mat *score_mat, struct seqprof *sp,
             struct seq *seq, const struct sub_mat *smat)
{
    const char *this_sub = "score_sprof";
    extern const char *mismatch;

    if (( score_mat->n_rows != sp->nres + 2) ||
        ( score_mat->n_cols != seq->length + 2)) {
        err_printf (this_sub, mismatch,
                    score_mat->n_rows - 2, score_mat->n_cols - 2,
                    sp->nres, seq->length);
        return (EXIT_FAILURE);
    }
    inner_score_prof_seq (score_mat->mat, sp, seq, smat);
    return EXIT_SUCCESS;
}

/* ---------------- score_prof_prof ---------------------------
 * Score an profile/profile pair given a substitution matrix.
 */
int
score_prof_prof ( struct score_mat *score_mat, struct seqprof *sp1,
             struct seqprof *sp2, const struct sub_mat *smat)
{
    const char *this_sub = "score_prof_prof";
    extern const char *mismatch;

    if (( score_mat->n_rows != sp1->nres + 2) ||
        ( score_mat->n_cols != sp2->nres + 2)) {
        err_printf (this_sub, mismatch,
                    score_mat->n_rows - 2, score_mat->n_cols - 2,
                    sp1->nres, sp2->nres);
        return (EXIT_FAILURE);
    }
    inner_score_prof_prof (score_mat->mat, sp1, sp2, smat);
    return EXIT_SUCCESS;
}