Saturday, 17 January 2015

DSP Tech Brief : How To Perform An FFT On A Real-Time Data Stream

In this example we are going to use the SigLib DSP Library functions to perform a Fast Fourier Transform on the Analog inputs on our laptop or desktop computer.

The analog I/O functionality is based on the code described in this blog entry :
How To Connect Digital Signal Processing Functions To An ADC And A DAC

We can use the free version of SigLib (no source code) from here : http://www.numerix-dsp.com/free/.
Before proceeding any further, please ensure :

  That SigLib is installed and configured correctly for your compiler. Full details are included in the SigLib User's Guide which is provided with the library and also available here : http://www.numerix-dsp.com/docs/.
  That GnuPlot and Gnuplot/C are both installed and working correctly : http://realgonegeek.blogspot.co.uk/search?q=gnuplot.

The code uses a double buffering technique so that the analog input code writes the data into a separate array to the one being processed with the FFT functions.

Here is the source code for the top level function (pa_fft.c) :

#include <stdio.h>
#include <math.h>
#include "analog_io.h"
#include <siglib.h>                                 /* SigLib DSP library */
#include <gnuplot_c.h>                              /* Gnuplot/C */

#if _MSC_VER                                        // If compiler not MSVC then include kbhit.h
#include <conio.h>
#else
#include "kbhit.h"
#endif

#define SAMPLE_RATE     44100

#define FFT_LENGTH      ((SLArrayIndex_t)1024)
#define LOG2_FFT_LENGTH ((SLArrayIndex_t)10)
#define HALF_FFT_LENGTH (FFT_LENGTH >> 1)
#define WINDOW_SIZE     FFT_LENGTH

/* Declare global variables and arrays */
SLData_t    *pRealData, *pImagData, *pWindowCoeffs, *pResults, *pFFTCoeffs;


volatile int Input_SamplesCount;
volatile int Input_Data_Valid_Flag;
volatile int Input_Data_Buffer;

SLData_t  *pInputData, *pProcessData;
SLData_t  Data1 [FFT_LENGTH], Data2 [FFT_LENGTH];

h_GPC_Plot  *h2DPlot;                           /* Plot object */

void analog_isr (void)
{
                            // Process channel 0 - Store data for FFT and output zero
  *(pInputData+Input_SamplesCount) = (double)adc_in0;
  dac_out0 = 0;
                            // Process channel 1 - just output zero
  dac_out1 = 0;

  Input_SamplesCount++;
  if (Input_SamplesCount == FFT_LENGTH)           // If we have filled the buffer, mark data as valid and swap buffers
  {
    Input_Data_Valid_Flag = 1;                    // FFT Data is now valid
    Input_SamplesCount = 0;                       // Reset input sampels count

    if (Input_Data_Buffer == 1)
    {
      pInputData = Data2;                         /* Input to array 1  */
      pProcessData = Data1;                       /* Perform FFT on array 2 */
      Input_Data_Buffer = 2;
    }
    else              // (Input_Data_Buffer == 2)
    {
      pInputData = Data1;                         /* Input to array 1  */
      pProcessData = Data2;                       /* Perform FFT on array 2 */
      Input_Data_Buffer = 1;
    }
  }
}

void process_FFT (void)

{

  SDA_Copy (pProcessData, pRealData, FFT_LENGTH); // Copy data for processing

  Input_Data_Valid_Flag = 0;                      // FFT Data has been copied for processing

                                                  /* Apply window to data */
  SDA_Window (pRealData,                          /* Pointer to source array */
              pRealData,                          /* Pointer to destination array */
              pWindowCoeffs,                      /* Pointer to window oefficients */
              WINDOW_SIZE);                       /* Window length */

                                                  /* Perform real FFT */
  SDA_Rfft (pRealData,                            /* Pointer to real array */
            pImagData,                            /* Pointer to imaginary array */
            pFFTCoeffs,                           /* Pointer to FFT coefficients */
            SIGLIB_NULL_ARRAY_INDEX_PTR,          /* Pointer to bit reverse address table - NOT USED */
            FFT_LENGTH,                           /* FFT length */
            LOG2_FFT_LENGTH);                     /* log2 FFT length */

  SDA_LogMagnitude (pRealData,                    /* Pointer to real source array */
                    pImagData,                    /* Pointer to imaginary source array */
                    pResults,                     /* Pointer to log magnitude destination array */
                    HALF_FFT_LENGTH);             /* Data dataset length */

  SDA_Offset (pResults, -138.0, pResults, HALF_FFT_LENGTH); // Offset graph results for 0 dB

  gpc_plot_2d (h2DPlot,                           /* Graph handle */
               pResults,                          /* Dataset */
               HALF_FFT_LENGTH,                   /* Dataset length */
               "FFT of ADC Input Data",           /* Dataset title */
               SIGLIB_ZERO,                       /* Minimum X value */
               (double)(HALF_FFT_LENGTH - 1),     /* Maximum X value */
               "lines",                           /* Graph type */
               "blue",                            /* Colour */
               GPC_NEW);                          /* New graph */

}

int main(void)
{
  int Error;

  Input_SamplesCount = 0;                         // Initialize flags
  Input_Data_Valid_Flag = 0;
  Input_Data_Buffer = 0;

  pInputData = Data1;                             /* Input to array 1  */
  pProcessData = Data2;                           /* Perform FFT on array 2 */

                                                  /* Allocate memory */
  pRealData = SUF_VectorArrayAllocate (FFT_LENGTH);
  pImagData = SUF_VectorArrayAllocate (FFT_LENGTH);
  pFFTCoeffs = SUF_FftCoefficientAllocate (FFT_LENGTH);
  pResults = SUF_VectorArrayAllocate (FFT_LENGTH);        /* RMS result array */
  pWindowCoeffs = SUF_VectorArrayAllocate (WINDOW_SIZE);  /* Window array */

  h2DPlot =                                       /* Initialize plot */
      gpc_init_2d ("Fast Fourier Transform",      /* Plot title */
                   "Frequency",                   /* X-Axis label */
                   "Magnitude",                   /* Y-Axis label */
                   120.0,                         /* Scaling mode */
                   GPC_NEGATIVE,                  /* Sign mode */
                   GPC_MULTIPLOT,                 /* Multiplot / fast plot mode */
                   GPC_KEY_DISABLE);               /* Legend / key mode */
  if (h2DPlot == NULL)
  {
      printf ("\nPlot creation failure.\n");
      exit (1);
  }

  SIF_Window (pWindowCoeffs,                      /* Pointer to window oefficient */
              SIGLIB_HANNING,                     /* Window type */
              SIGLIB_ZERO,                        /* Window coefficient */
              FFT_LENGTH);                        /* Window length */

                                                  /* Initialise FFT */
  SIF_Fft (pFFTCoeffs,                            /* Pointer to FFT coefficients */
           SIGLIB_NULL_ARRAY_INDEX_PTR,           /* Pointer to bit reverse address table - NOT USED */
           FFT_LENGTH);                           /* FFT length */

  Error = analog_open (SAMPLE_RATE, analog_isr);  // Open the analog interface
  if (Error == -1)
    return 1;

  printf("Hit ENTER to stop program.\n");
  while (!kbhit())                                // Wait until key hit
  {
    if (Input_Data_Valid_Flag == 1)               // If data is valid process the FFT
    {
      process_FFT ();
    }
  }

  Error = analog_close ();                        // Close the analog interface
  if (Error == -1)
    return 1;

  gpc_close (h2DPlot);

  SUF_MemoryFree (pRealData);                     /* Free memory */
  SUF_MemoryFree (pImagData);
  SUF_MemoryFree (pResults);
  SUF_MemoryFree (pWindowCoeffs);
  SUF_MemoryFree (pFFTCoeffs);

  return 0;
}

Notes :
  The global variables are decalared as volatile. This prevents the compiler optimizing away the code, which often happens when variables are used in interrupt service routines because the compiler has no way of knowing how the ISR is going to be called.
  GCC and some other compilers do not implement the kbhit () function so I have included the code from here : http://cboard.cprogramming.com/linux-programming/51531-faq-cached-input-mygetch.html#post357655.

Testing

Under Microsoft Visual Studio the code can be built using the following command (assuming you have already got a working PortAudio installation) :
cl pa_fft.c analog_io.c -W4 -DPA_USE_ASIO=1 -D "SIGLIB_STATIC_LIB=1" -D "_CRT_SECURE_NO_WARNINGS=1" portaudio_x86.lib siglib.lib gnuplot_c.lib nhl.lib

My laptop only supports a single microphone input to the ADC so to test the code I used an XMOS Multi-Function Audio (MFA) platform, shown in this image :



The XMOS xCORE processor on this board is more than capable of handling the signal processing code shown in this blog but for the purposes of the demonstration I am running the DSP code on the host.

If you run the code yourself you will see the the FFT output plotted in Gnuplot and should look something like this - if you apply a noisy sinewave to the input ;-) :


Stay tuned for a future blog post where I will implement this code and the IIR filter code on an XMOS xCORE :-)

How To Connect Digital Signal Processing Functions To An ADC And A DAC

In my earlier post I showed how to implement an Infinite Impulse Response (IIR) filter :
Let's Do Some More Digital Signal Processing - Infinite Impulse Response (IIR) Filters.

In this blog I will look at how to interface DSP functions (e.g. the SigLib DSP library functions) to an Analog to Digital Converter (ADC) or Digital to Analog Converter (DAC). For this blog I will use the IIR filter functions mentioned in the above blog entry.

The system configuration is shown in this diagram :



In order to handle the ADC/DAC data input and output (I/O) it is necessary to write an Interrupt Service Routine (ISR) that responds to the interrupt from the ADC/DAC.

Any computer with a soundcard can be used to demonstrate this however modern Operating Systems trap interrupts and use drivers to interface to the hardware. In order to demonstrate hardware interrupts it is necessary to write a virtual software interrupt generator using a call back function.
I am going to use the free, cross-platform open-source audio I/O library PortAudio to implement the I/O then I will call the callback function that we will use as our ISR.

On a typical embedded system it will not be necessary to use PortAudio, just hook your ISR function to the interrupt from your ADC/DAC and it should work in exactly the same way.

Here is the header file (analog_io.h.) Note that I have declared the input and output registers as global variables. I know this doesn't conform to any standards for structured programming however these are designed to represent the input and output registers of the ADC and DAC devices.

int analog_open (int SampleRate, void (*analog_isr) (void));
int analog_close (void);

volatile short adc_in0, adc_in1, dac_out0, dac_out1;

Here is the wrapper code (analog_io.c.) for simulating our ISR using PortAudio. Note the pointer to the ISR is passed to the function analog_open () along with the desired sample rate.


#include <stdio.h>
#include "portaudio.h"
#include "analog_io.h"

#define PA_SAMPLE_TYPE      paInt32
#define FRAMES_PER_BUFFER   (64)

void (*p_analog_isr) (void);

typedef int SAMPLE;
PaStream *stream;

static int paCallback( const void *inputBuffer, void *outputBuffer,
                       unsigned long framesPerBuffer,
                       const PaStreamCallbackTimeInfo* timeInfo,
                       PaStreamCallbackFlags statusFlags,
                       void *userData );

/* PortAudio callback */
static int paCallback( const void *inputBuffer, void *outputBuffer,
                       unsigned long framesPerBuffer,
                       const PaStreamCallbackTimeInfo* timeInfo,
                       PaStreamCallbackFlags statusFlags,
                       void *userData )
{
    SAMPLE *out = (SAMPLE*)outputBuffer;
    const SAMPLE *in = (const SAMPLE*)inputBuffer;
    unsigned int i;
    (void) timeInfo; /* Prevent unused variable warnings. */
    (void) statusFlags;
    (void) userData;

    if( inputBuffer == NULL )         // If input buffer empty then output silence
    {
        for( i=0; i<framesPerBuffer; i++ )
        {
            *out++ = 0;               /* Left */
            *out++ = 0;               /* Right */
        }
    }
    else                              // If input buffer NOT empty then process data
    {
        for( i=0; i<framesPerBuffer; i++ )
        {
          adc_in0 = (short)(*in++ >> 16);            /* Left */
          adc_in1 = (short)(*in++ >> 16);            /* Right */
          p_analog_isr ();
          *out++ = (int)(dac_out0 << 16);
          *out++ = (int)(dac_out1 << 16);
        }
    }
    return paContinue;
}


int analog_open (int SampleRate, void (*analog_isr) (void))
{
  p_analog_isr = analog_isr;

  PaStreamParameters inputParameters, outputParameters;
  PaError err;

  err = Pa_Initialize();
  if( err != paNoError ) goto error;

  inputParameters.device = Pa_GetDefaultInputDevice(); /* default input device */
  if (inputParameters.device == paNoDevice) {
    fprintf(stderr,"Error: No default input device.\n");
    goto error;
  }
  inputParameters.channelCount = 2;       /* stereo input */
  inputParameters.sampleFormat = PA_SAMPLE_TYPE;
  inputParameters.suggestedLatency = Pa_GetDeviceInfo( inputParameters.device )->defaultLowInputLatency;
  inputParameters.hostApiSpecificStreamInfo = NULL;

  outputParameters.device = Pa_GetDefaultOutputDevice(); /* default output device */
  if (outputParameters.device == paNoDevice) {
    fprintf(stderr,"Error: No default output device.\n");
    goto error;
  }
  outputParameters.channelCount = 2;       /* stereo output */
  outputParameters.sampleFormat = PA_SAMPLE_TYPE;
  outputParameters.suggestedLatency = Pa_GetDeviceInfo( outputParameters.device )->defaultLowOutputLatency;
  outputParameters.hostApiSpecificStreamInfo = NULL;

  err = Pa_OpenStream(&stream,
                      &inputParameters,
                      &outputParameters,
                      SampleRate,
                      FRAMES_PER_BUFFER,
                      0, /* paClipOff, */  /* we won't output out of range samples so don't bother clipping them */
                      paCallback,
                      NULL );
  if( err != paNoError ) goto error;

  err = Pa_StartStream( stream );
  if( err != paNoError ) goto error;

  return 0;

error:
  Pa_Terminate();
  fprintf( stderr, "An error occured while using the portaudio stream\n" );
  fprintf( stderr, "Error number: %d\n", err );
  fprintf( stderr, "Error message: %s\n", Pa_GetErrorText( err ) );
  return -1;
}

int analog_close (void)

{
  int err;

  err = Pa_CloseStream( stream );
  if( err != paNoError ) goto error;

  Pa_Terminate();
  return 0;

error:
    Pa_Terminate();
    fprintf( stderr, "An error occured while using the portaudio stream\n" );
    fprintf( stderr, "Error number: %d\n", err );
    fprintf( stderr, "Error message: %s\n", Pa_GetErrorText( err ) );
  return -1;
}

Here is the header file (iir.h.) for the IIR filtering functions :

void iir_init (double *pState,
  const int NumberOfStages);

double iir_filter (const double Source,
  double *pState,
  const double *pCoeffs,
  const int NumberOfStages);

And here are the IIR filtering functions (taken from the blog mentioned earlier)  (iir.c.) :

#include "iir.h"

void iir_init (double *pState,
  const int NumberOfStages)

{
  int  i;

      /* Initialise the filter state array to 0 */
  for (i = 0; i < (NumberOfStages * 2); i++)
  {
    *pState++ = 0.0;
  }

}   /* End of iir_init () */


double iir_filter (const double Source,
  double *pState,
  const double *pCoeffs,
  const int NumberOfStages)

{
  register double       FeedbackSumOfProducts;
  register int i;
  register double       TempInputData;

  TempInputData = Source;

  for (i = 0; i < NumberOfStages; i++)
  {
    FeedbackSumOfProducts = TempInputData - (*(pCoeffs+3) * *pState) -
        (*(pCoeffs+4) * *(pState+1));       /* Feedback */
    TempInputData = (*pCoeffs * FeedbackSumOfProducts) +
        (*(pCoeffs + 1) * *pState) +
        (*(pCoeffs + 2) * *(pState+1));     /* Feedforward and save result for next time round */
    
    *(pState+1) = *pState;                  /* Move delayed samples */
    *pState = FeedbackSumOfProducts;

    pState += 2;        /* Increment array pointers */
    pCoeffs += 5;
  }

  return (TempInputData);                     /* Save output */

}   /* End of iir_filter () */

Now all we need is the top level program (pa_filter.c) which includes the filter coefficients (generated using Digital Filter Plus http://www.numerix-dsp.com/dfplus/), the main () function to initialize the filters and the analog interface and we need the ISR to call the DSP functions when the interrupt occurs.

#include <stdio.h>
#include <math.h>
#include "analog_io.h"
#include "iir.h"

#define SAMPLE_RATE     44100
#define FILTER_STAGES   1           /* Single 2nd ordrer biquad */

// Fc = 1 kHz LPF
double LPFCoefficientArray [] =
{
3.43448050764769180000e-003, 6.86896101529538370000e-003, 3.43448050764769180000e-003,
-1.89857508921973460000e+000, 9.12313011250325380000e-001
};

// Fc = 1 kHz HPF
double HPFCoefficientArray [] =
{
9.32663817644617140000e-001, -1.86532763528923430000e+000, 9.32663817644617140000e-001,
-1.85191320077338670000e+000, 8.78746746701696060000e-001
};

double Chan0FilterState [2 * FILTER_STAGES], Chan1FilterState [2 * FILTER_STAGES];


void analog_isr (void)
{
  double analog_sample;

                            // Process channel 0
  analog_sample = (double)adc_in0;
  analog_sample = iir_filter (analog_sample, Chan0FilterState, LPFCoefficientArray, FILTER_STAGES);
  dac_out0 = (short)(analog_sample);

                            // Process channel 1
  analog_sample = (double)adc_in1;
  analog_sample = iir_filter (analog_sample, Chan1FilterState, HPFCoefficientArray, FILTER_STAGES);
  dac_out1 = (short)(analog_sample);

}

int main(void)
{
  int Error;

  iir_init (Chan0FilterState, FILTER_STAGES);
  iir_init (Chan1FilterState, FILTER_STAGES);

  Error = analog_open (SAMPLE_RATE, analog_isr);  // Open the analog interface
  if (Error == -1)
    return 1;

  printf("Hit ENTER to stop program.\n");
  getchar();

  Error = analog_close ();                        // Close the analog interface
  if (Error == -1)
    return 1;

  return 0;
}

Testing

Under Microsoft Visual Studio the code can be built using the following command (assuming you have already got a working PortAudio installation) :
cl pa_filter.c analog_io.c iir.c -DPA_USE_ASIO=1 portaudio_x86.lib

My laptop only supports a single microphone input to the ADC so to test the code I used an XMOS Multi-Function Audio (MFA) platform, shown in this image :



The XMOS xCORE processor on this board is more than capable of handling the signal processing code shown in this blog but for the purposes of the demonstration I am running the DSP code on the host.

If you run the code yourself you will hear the difference between the two channels after applying the Low-pass and High-pass filters.

Let's Do Some More Digital Signal Processing - Infinite Impulse Response (IIR) Filters


Following on from my earlier blog : Let's Do Some Digital Signal Processing (Part 2) - One Pole Filter I thought it was about time that I took a look at Infinite Impulse Response (IIR) filters.

IIR filters give much higher performance than Finite Impulse Response filters because they use feedback as well as feedforward in order to implement the filter response.

This code shows how to implement an IIR biquad filter structure.

#include <stdio.h>

#define COUNT 8  /* Number of samples to process */

#define FILTER_STAGES   1           /* Single 2nd ordrer biquad */

// Fs = 44.1 kHz, Fc = 1 kHz LPF
double LPFCoefficientArray [] =
{
3.43448050764769180000e-003, 6.86896101529538370000e-003, 3.43448050764769180000e-003,
-1.89857508921973460000e+000, 9.12313011250325380000e-001
};

float impulseSrc[] = {
 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
 };
float stepSrc[] = {
 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
 };
float Dst[] = {
 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
 };

double FilterState [2 * FILTER_STAGES];

void iir_init (double *pState,
  const int NumberOfStages);

double iir_filter (const double Source,
  double *pState,
  const double *pCoeffs,
  const int NumberOfStages);

void main (void)

{
  int i;

  iir_init (FilterState, FILTER_STAGES);      /* Initialize the filter */
  for (i = 0; i < COUNT; i++)
    Dst[i] = iir_filter (impulseSrc[i], FilterState, LPFCoefficientArray, FILTER_STAGES);
  printf ("One Pole Impulse = %f, %f, %f, %f, %f, %f, %f, %f\n", Dst[0], Dst[1], Dst[2], Dst[3], Dst[4], Dst[5], Dst[6], Dst[7]);

  iir_init (FilterState, FILTER_STAGES);      /* Initialize the filter */
  for (i = 0; i < COUNT; i++)
    Dst[i] = iir_filter (stepSrc[i], FilterState, LPFCoefficientArray, FILTER_STAGES);
  printf ("One Pole Step = %f, %f, %f, %f, %f, %f, %f, %f\n", Dst[0], Dst[1], Dst[2], Dst[3], Dst[4], Dst[5], Dst[6], Dst[7]);

}


void iir_init (double *pState,
  const int NumberOfStages)

{
  int  i;

      /* Initialise the filter state array to 0 */
  for (i = 0; i < (NumberOfStages * 2); i++)
  {
    *pState++ = 0.0;
  }

}   /* End of iir_init () */


double iir_filter (const double Source,
  double *pState,
  const double *pCoeffs,
  const int NumberOfStages)

{
  register double       FeedbackSumOfProducts;
  register int i;
  register double       TempInputData;

  TempInputData = Source;

  for (i = 0; i < NumberOfStages; i++)
  {
    FeedbackSumOfProducts = TempInputData - (*(pCoeffs+3) * *pState) -
        (*(pCoeffs+4) * *(pState+1));       /* Feedback */
    TempInputData = (*pCoeffs * FeedbackSumOfProducts) +
        (*(pCoeffs + 1) * *pState) +
        (*(pCoeffs + 2) * *(pState+1));     /* Feedforward and save result for next time round */
    
    *(pState+1) = *pState;                  /* Move delayed samples */
    *pState = FeedbackSumOfProducts;

    pState += 2;        /* Increment array pointers */
    pCoeffs += 5;
  }

  return (TempInputData);                     /* Save output */

}   /* End of iir_filter () */


The result of running this code is :
One Pole Impulse = 0.003434, 0.013390, 0.025722, 0.036620, 0.046059, 0.054038, 0.060575, 0.065706
One Pole Step = 0.003434, 0.016824, 0.042546, 0.079167, 0.125226, 0.179264, 0.239839, 0.305545

The filter implemented in this example is a low-pass filter with 1 kHz cut-off frequency and a sampling rate of 44.1 kHz (CD frequency). This can be observed by modifying the above code to generate a longer impulse response (e.g. 1024) and then passing this data to a Fast Fourier Transform (FFT).


Wednesday, 7 January 2015

Timing C Code Running On the XMOS xCORE Architecture

I recently had to show a customer how long a piece of code took to execute on the XMOS xCORE.

The nice thing about the xCORE is that it includes hardware timers that can be directly accessed from the application.

In order to be able to benchmark the code I knew there would be a small overhead to call the timers from the top level C code so my first task was to calculate this.

Having calculated the overhead I was now free to benchmark the actual code then remove the overhead to print the correct time.

As an aside, one of the great benefits of the XMOS architecture is how printf's are implemented. Essentially the output of a printf can be routed over the inter-processor communications link called xCONNECT, in real-time. This allows printf's to be located directly inside real-time code, unlike other microcontrollers where locating a printf inside an interrupt service routine is guaranteed to cause a run-time exception (AKA crash).

Here is the benchmarking code showing the performance of a simple piece of C code (or assembly) :

#include <stdio.h>

int main (void) 
{
  timer t;
  int start_time, end_time, overhead_time;

  t :> start_time;
  t :> end_time;
  overhead_time = end_time - start_time;

  t :> start_time;
  asm ("nop");
  asm ("nop");
  t :> end_time;
  
  printf ("Elapsed time = %d cycles\n", end_time - start_time - overhead_time);

  return (0);
}

And here is the output :

Elapsed time = 2 cycles

Phew, two nops takes two cycles :-)

Notes
XMOS use the incredible LLVM compiler, which I am sure will one day take over from GCC. It is almost never necessary to revert to assembly code but if you need to then you can see from the above example just how easy it is to use.
Maybe a future blog will cover how to read and write hardware (the timer, as shown above) but the instructions are essentially simple extensions to C (called XC) that are documented in the XMOS Programming Guide : https://www.xmos.com/download/public/XMOS-Programming-Guide-(documentation)(E).pdf.

C Code To Implement The Q Format Number System

I recently posted an "Introduction To The Q Format Number System" : http://realgonegeek.blogspot.co.uk/2015/01/introduction-to-q-format-number-system.html.

This afternoon I was driving back from a meeting and thought I should put some code together to demonstrate the operations. Here it is (you will notice that I have used a somewhat more useful word length than 8 bits for this piece of code) :


#include <stdio.h>

double QFormatIntegerToFloat (const int x,
  const int n)

{
  return (((double)x) / ((double)(1 << n)));
}

int FloatToQFormatInteger (const double x,
  const int m,
  const int n)

{
  int   i;
  int   y;
  int   mask = 0;

  y = (int)(x * ((double)(1 << n)));  // Generate the integer number in m.n format

  for (i = 0; i < (m+n); i++)         // Calculate the mask to ensure we return the correct number of valid bits
  {
    mask = (mask << 1) + 1;
  }

  return (y & mask);
}

int MpyQFormatInteger (const int x,
  const int y,
  const int n)

{
  long long tmp;
  tmp = ((long long)x * (long long)y) >> n;
  return ((int)tmp);
}


int main() 
{
  int         a, b, c, d, e;


  printf ("%lf = 0x%x\n",  2.0 , FloatToQFormatInteger (2.0 , 8, 24));
  printf ("%lf = 0x%x\n",  1.5 , FloatToQFormatInteger (1.5 , 8, 24));
  printf ("%lf = 0x%x\n",  0.5 , FloatToQFormatInteger (0.5 , 8, 24));
  printf ("%lf = 0x%x\n",  0.25, FloatToQFormatInteger (0.25, 8, 24));
  printf ("%lf = 0x%x\n",  0.75, FloatToQFormatInteger (0.75, 8, 24));
  printf ("\n");

  a = FloatToQFormatInteger (0.75, 8, 24);
  b = FloatToQFormatInteger (2.0 , 8, 24);
  c = MpyQFormatInteger (a, b, 24);

  printf ("Result = 0x%x : %lf\n", c, QFormatIntegerToFloat (c , 24));

  d = FloatToQFormatInteger (0.5 , 8, 24);
  e = MpyQFormatInteger (c, d, 24);

  printf ("Result = 0x%x : %lf\n", e, QFormatIntegerToFloat (e , 24));

  return (0);
}

Here is the output :

2.000000 = 0x2000000
1.500000 = 0x1800000
0.500000 = 0x800000
0.250000 = 0x400000
0.750000 = 0xc00000

Result = 0x1800000 : 1.500000
Result = 0xc00000 : 0.750000

Sunday, 4 January 2015

New DSP Software Updates

Prompted by the release of wxWidgets v3.x I have used the holidays as an opportunity to update and test a load of my software libraries and utilities.
They can be downloaded from here :

SigLib (v8.53) DSP Library : http://www.numerix-dsp.com/siglib.html. Includes some new functions for Q format integer support and it has been tested on Mac OS X and Raspberry Pi.

The following example programs and libraries have also been updated and these can be downloaded from http://www.numerix-dsp.com/files/ :

    System Analyzer- V2.10 - A simple System Analyzer that uses wxWidgets V3.0.2 (A free portable GUI library), NGL, PortAudio (A free portable Audio I/O library) and can optionally use SigLib. This application includes the full GUI source code.

    Numerix Host Library - V3.00 - A library of host I/O routines, including full source code. Functions include console and File I/O, including .WAV files. These functions can be used on any OS (UNIX, Windows etc.) and also on any other embedded processor or DSP that supports console and file I/O via the debug environment. These functions can also be used with Gnuplot/C to run any of the SigLib example programs.

    Numerix Graphics Library - V2.60 - A simple library of graphics routines for the wxWidgets V3.0.x based GUI applications. Full source code is included.

Friday, 2 January 2015

DSP Tech Brief : Introduction To The Q Format Number System

One area of Digital Signal Processing that causes sleepless nights for Design Engineers is when they first encounter fixed point maths. Issues such as overflow and underflow,. that are automatically resolved when using floating point numbers, can add an extra layer of complexity when implementing fixed point algorithms.
Unfortunately in embedded system design it is typically not possible to avoid fixed point issues because floating point silicon requires many more transistors, particularly to implement the multiplier, and hence is more expensive and slower than equivalent fixed point implementations.
There are many different ways to represent numbers using integers including signed 2's Complement, Signed Offset Binary etc. but one of the most useful is the Q Format.
Q format numbers split the fixed point word into two components - the integer (m bits) and fractional (n bits) parts to give a number represented in the format m.n. This makes the word length of the integer equal to m+n bits.
The Q format allows the algorithm to be implemented using fixed point operations but requires the designer to manage the data both into and out of the algorithm.

A regular integer number creates overflow during the multiplication and addition operations.
Some processors provide a partial workaround for this by allowing optional use of fractional numbers for multiplication, in which case underflow occurs but this still doesn't avoid overflow with addition operations.

8 bit integer multiplication example

Treating fixed point numbers as integers means that we have the possibility of overflow.
The maximum 8 bit signed number is 127 (0x7f)
127 x 127 = 16129 which is a 16 bit number and has overflowed our 8 bit word length

8 bit fractional multiplication example

Treating fixed point numbers as fractions means that we have the possibility of underflow but not overflow.
The maximum 8 bit signed number is 127/128 = 0.9921875 (0x7f)
0.9921875 x 0.9921875 = 0.98443603515625 which has not overflowed our 8 bit word length but the lsbs will underflow to give us 126/128 = 0.984375.

8 bit (2.6) Q format example

We have 2 bits for the integer (m) and 6 bits for the fractional (n) components.
The value of 1.0 is represented by 0b01000000 (0x40)
If we now represent the following numbers in 2.6 Q format :
  2.0  - 0b10000000 (0x80)
  1.5  - 0b01100000 (0x60)
  0.5  - 0b00100000 (0x20)
  0.25 - 0b00010000 (0x10)
  0.75 - 0b00110000 (0x30)

We can now perform some simple maths :
  0.75 x 2 = 0x30 x 0x80 = 0x1800 (0b1100000000000)
Note the word growth, which means we need a longer word length for the partial result. We now need to scale this back to give us our 8 bit result (or we could keep this extended format if this is part of a multiply accumulate (MAC) instruction).
Scaling (right shifting) by the number of fractional bits (6) gives us 0b1100000 (0x60) or 1.5, which is the result we would expect.
We could now perform the same multiply operation with :
  1.5 x 0.5 = 0x60 x 0x20 = 0xC00 (0b110000000000)
And again, if we perform the required scaling by 6 bits we get 0b110000 (0x30) = 0.75
We can see from this example that it is possible to handle numbers with both integer and fractional components using the Q format.

FIR Filtering
  When implementing a regular FIR filter where |h(n)| <= 1.0 then the Q format number system allows for word growth in the accumulator.
  The worst case word growth allows 2^N MAC operations before overflow, where N is the number of guard bits in the accumulator word.
  Using Q format numbers allows m integer bits to be used for the guard bits hence in the above example we can implement 2^2 = 4 MAC operations and guarantee no overflow.
  In more realistic FIR filters where only the central coefficients tend towards the maximum it is possible to implement larger filters before experiencing overflow but these scenarios should always be tested thoroughly.

IIR Filtering
  Unlike FIR filters, where |h(n)| is typically <= 1.0, in IIR filters the coefficient magnitude can often be > 1.0. Here are the coefficients of a very simple 2nd order Low Pass Butterworth Filter :
  float CoefficientArray [] =
  {
    9.57767909330728970000e-002, 1.61076720320250940000e-001, 9.57767909330728970000e-002,
    -1.26453884117041130000e+000, 6.17169143356808060000e-001
  };
  Implementing a filter like this in a fixed point DSP is clearly much more easy when using Q format numbers than either integer or fractional fixed point numbers.

As an example of where you might use Q format numbers is in high quality (>= 24 bit) audio. Using a 32 bit word length allows several options for the values of m and n, for example :
  32 bit word using m.n = 8.24 gives
    8 integer bits for word growth and 24 fractional bits
  32 bit word using m.n = 5.27 gives
    5 integer bits for word growth and 27 fractional bits for fractional bit growth
In these examples it is common to use a 64 bit result / accumulator register then this partial result is converted back to 32 bit Q format for further processing stages.

Please see this blog entry for C code for implementing the Q format number system : http://realgonegeek.blogspot.co.uk/2015/01/c-code-to-implement-q-format-number.html.

Further fixed point references are available at :
    https://www.xmos.com/support/appnotes
    https://github.com/xcore?query=DSP
    http://www.numerix-dsp.com/appsnotes/

This topic is covered in much more detail on the University of Oxford DSP course : http://realgonegeek.blogspot.co.uk/2014/12/the-next-round-of-university-of-oxford.html.