_DIGITAL SPEECH COMPRESSION_
by Jutta Degener


Listing One

/* params.h -- common definitions for the speech processing listings. */

#define P_MAX   8   /* order p of LPC analysis, typically 8..14 */
#define WINDOW  160 /* window size for short-term processing    */

double levinson_durbin(double const * ac, double * ref, double * lpc);
double schur(double const * ac, double * ref);
void autocorrelation(int n, double const * x, int lag, double * ac);

Listing Two

/* LPC- and Reflection Coefficients
 * The next two functions calculate linear prediction coefficients
 * and/or the related reflection coefficients from the first P_MAX+1
 * values of the autocorrelation function.
 */
#include "params.h"     /* for P_MAX */

/* The Levinson-Durbin algorithm was invented by N. Levinson in 1947
 * and modified by J. Durbin in 1959.
 */
double                  /* returns minimum mean square error        */
levinson_durbin(
    double const * ac,  /*  in: [0...p] autocorrelation values      */
    double       * ref, /* out: [0...p-1] reflection coefficients   */
    double       * lpc) /*      [0...p-1] LPC coefficients          */
{
    int i, j;  double r, error = ac[0];

    if (ac[0] == 0) {
        for (i = 0; i < P_MAX; i++) ref[i] = 0;
        return 0;
    }

    for (i = 0; i < P_MAX; i++) {

        /* Sum up this iteration's reflection coefficient. */
        r = -ac[i + 1];
        for (j = 0; j < i; j++) r -= lpc[j] * ac[i - j];
        ref[i] = r /= error;    

        /* Update LPC coefficients and total error. */
        lpc[i] = r;
        for (j = 0; j < i / 2; j++) {
            double tmp      = lpc[j];
            lpc[j]          = r * lpc[i - 1 - j];
            lpc[i - 1 - j] += r * tmp;
        }
        if (i % 2) lpc[j] += lpc[j] * r;

        error *= 1 - r * r;
    }
    return error;
}

/* I. Schur's recursion from 1917 is related to the Levinson-Durbin method,
 * but faster on parallel architectures; where Levinson-Durbin would take time
 * proportional to p * log(p), Schur only requires time proportional to p. The
 * GSM coder uses an integer version of the Schur recursion.
 */
double                  /* returns the minimum mean square error    */
schur(
    double const * ac,  /*  in: [0...p] autocorrelation values      */
    double       * ref) /* out: [0...p-1] reflection coefficients   */
{
    int i, m;  double r, error = ac[0], G[2][P_MAX];

    if (ac[0] == 0.0) {
        for (i = 0; i < P_MAX; i++) ref[i] = 0;
        return 0;
    }

    /* Initialize the rows of the generator matrix G to ac[1...p]. */
    for (i = 0; i < P_MAX; i++) G[0][i] = G[1][i] = ac[i + 1];

    for (i = 0;;) {
        /* Calculate this iteration's reflection coefficient and error. */
        ref[i] = r = -G[1][0] / error;
        error += G[1][0] * r;

        if (++i >= P_MAX) return error;

        /* Update the generator matrix. Unlike Levinson-Durbin's summing of 
         * reflection coefficients, this loop could be executed in parallel
         * by p processors in constant time.
         */
        for (m = 0; m < P_MAX - i; m++) {
            G[1][m] = G[1][m + 1] + r * G[0][m];
            G[0][m] = G[1][m + 1] * r + G[0][m];
        }
    }
}

/* Compute the autocorrelation 
 *          ,--,
 *   ac(l) = >  x(i) * x(i-l)  for all i
 *          `--'
 * for lags l between 0 and lag-1, and x(i) == 0 for i < 0 or i >= n
 */
void autocorrelation(
    int   n, double const * x,  /*  in: [0...n-1] samples x         */
    int lag, double       * ac) /* out: [0...lag-1] autocorrelation */
{
    double d; int i;
    while (lag--) {
        for (i = lag, d = 0; i < n; i++) d += x[i] * x[i-lag];
        ac[lag] = d;
    }
}


Listing Three

/* Short-Term Linear Prediction
 * To show which parts of speech are picked up by short-term linear
 * prediction, this program replaces everything but the short-term
 * predictable parts of its input with a fixed periodic pulse. (You
 * may want to try other excitations.)  The result lacks pitch
 * information, but is still discernible.
 */
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>

#include "params.h"   /* See Listing One; #defines WINDOW and P_MAX */

/* Default period for a pulse that will feed the short-term processing. The 
 * length of the period is the inverse of the pitch of the program's 
 * "robot voice"; the smaller the period, the higher the voice.
 */
#define PERIOD  100          /* human speech: between 16 and 160    */

/* The short-term synthesis and analysis functions below filter and inverse-
 * filter their input according to reflection coefficients from Listing Two. 
 */
static void short_term_analysis(
    double const * ref, /*  in: [0...p-1] reflection coefficients   */
    int            n,   /*      # of samples                        */
    double const * in,  /*      [0...n-1] input samples             */
    double       * out) /* out: [0...n-1] short-term residual       */
{
    double sav, s, ui; int i;
    static double u[P_MAX];

    while (n--) {
        sav = s = *in++;
        for (i = 0; i < P_MAX; i++) {
            ui   = u[i];
            u[i] = sav;

            sav  =  ui + ref[i] * s;
            s    =  s  + ref[i] * ui;
        }
        *out++ = s;
    }
}

static void short_term_synthesis(
    double const * ref, /*  in: [0...p-1] reflection coefficients   */
    int            n,   /*      # of samples                        */
    double const * in,  /*      [0...n-1] residual input            */
    double       * out) /* out: [0...n-1] short-term signal         */
{
    double s; int i;
    static double u[P_MAX+1];

    while (n--) {
        s = *in++;
        for (i = P_MAX; i--;) {
            s     -= ref[i] * u[i];
            u[i+1] = ref[i] * s + u[i];
        }
        *out++ = u[0] = s;
    }
}

/* This fake long-term processing section implements the "robotic" voice:
 * it replaces the short-term residual by a fixed periodic pulse.
 */
static void long_term(double * d)
{
    int i; static int r;
    for (i = 0; i < WINDOW; i++) d[i] = 0;
    for (; r < WINDOW; r += PERIOD) d[r] = 10000.0;
    r -= WINDOW;
}

/* Read signed short PCM values from stdin, process them as double,
 * and write the result back as shorts.
 */
int main(int argc, char ** argv)
{
    short  s[WINDOW]; double d[WINDOW]; int i, n;
    double ac[P_MAX + 1], ref[P_MAX];

    while ((n = fread(s, sizeof(*s), WINDOW, stdin)) > 0) {

        for (i = 0; i < n; i++) d[i] = s[i];
        for (; i < WINDOW; i++) d[i] = 0;

        /* Split input into short-term predictable part and residual. */
        autocorrelation(WINDOW, d, P_MAX + 1, ac);
        schur(ac, ref);
        short_term_analysis(ref, WINDOW, d, d);

        /* Process that residual, and synthesize the speech again. */
        long_term(d);
        short_term_synthesis(ref, WINDOW, d, d);

        /* Convert back to short, and write. */
        for (i = 0; i < n; i++)
            s[i] = d[i] > SHRT_MAX ? SHRT_MAX
                 : d[i] < SHRT_MIN ? SHRT_MIN
                 : d[i];
        if (fwrite(s, sizeof(*s), n, stdout) != n) {
            fprintf(stderr, "%s: write failed\n", *argv);
            exit(1);
        }
        if (feof(stdin)) break;
    }
    if (ferror(stdin)) {
        fprintf(stderr,"%s: read failed\n", *argv); exit(1); }
    return 0;
}

Listing Four

/* Long-Term Prediction
 * Here's a replacement for the long_term() function of Listing Three:
 * A "voice" that is based on the two long-term prediction parameters,
 * the gain and the lag.  By transmitting very little information,
 * the final output can be made to sound much more natural.
 */
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>

#include "params.h"     /* see Listing One; #defines WINDOW         */

#define SUBWIN  40      /* LTP window size, WINDOW % SUBWIN == 0    */

/*  Compute       n
 *               ,--,
 *      cc(l) =   >  x(i) * y(i-l)
 *               `--'
 *                i=0
 *  for lags l from 0 to lag-1.
 */
static void crosscorrelation(
    int            n,        /*  in: # of sample values             */
    double const * x,        /*      [0...n-1] samples x            */
    double const * y,        /*      [-lag+1...n-1] samples y       */
    int            lag,      /*      maximum lag+1                  */
    double       * c)        /* out: [0...lag-1] cc values          */
{
    while (lag--) {
        int i; double d = 0;
        for (i = 0; i < n; i++)
            d += x[i] * y[i - lag];
        c[lag] = d;
    }
}

/* Calculate long-term prediction lag and gain. */
static void long_term_parameters(
    double const * d,        /*  in: [0.....SUBWIN-1] samples       */
    double const * prev,     /*      [-3*SUBWIN+1...0] past signal  */
    int          * lag_out,  /* out: LTP lag                        */
    double       * gain_out) /*      LTP gain                       */
{
    int      i, lag;
    double   cc[2 * SUBWIN], maxcc, energy;

    /* Find the maximum correlation with lags SUBWIN...3*SUBWIN-1
     * between this frame and the previous ones.
     */
    crosscorrelation(SUBWIN, d, prev - SUBWIN, 2 * SUBWIN, cc);
    maxcc = cc[lag = 0];

    for (i = 1; i < 2 * SUBWIN; i++)
        if (cc[i] > maxcc) maxcc = cc[lag = i];

    *lag_out = lag + SUBWIN;

    /* Calculate the gain from the maximum correlation and
     * the energy of the selected SUBWIN past samples.
     */
    autocorrelation(SUBWIN, prev - *lag_out, 1, &energy);
    *gain_out = energy ? maxcc / energy : 1.0;
}

/* The "reduce" function simulates the effect of quantizing,
 * encoding, transmitting, decoding, and inverse quantizing the
 * residual signal by losing most of its information. For this
 * experiment, we simply throw away everything but the first sample value.
 */
static void reduce(double *d) {
    int i;
    for (i = 1; i < SUBWIN; i++) d[i] = 0;
}

void long_term(double * d)
{
    static double     prev[3*SUBWIN];
    double            gain;
    int               n, i, lag;

    for (n = 0; n < WINDOW/SUBWIN; n++, d += SUBWIN) {  

        long_term_parameters(d, prev + 3*SUBWIN, &lag, &gain);

        /* Remove the long-term predictable parts. */
        for (i = 0; i < SUBWIN; i++)
            d[i] -= gain * prev[3 * SUBWIN - lag + i];

        /* Simulate encoding and transmission of the residual. */
        reduce(d);

        /* Estimate the signal from predicted parameters and residual. */
        for (i = 0; i < SUBWIN; i++)
            d[i] += gain * prev[3*SUBWIN - lag + i];

        /* Shift the SUBWIN new estimates into the past. */
        for (i = 0; i < 2*SUBWIN; i++) prev[i] = prev[i + SUBWIN];
        for (i = 0; i < SUBWIN; i++) prev[2*SUBWIN + i] = d[i];
    }
}