Saturday 17 January 2015

DSP Tech Brief : How To Perform An FFT On A Real-Time Data Stream

In this example we are going to use the SigLib DSP Library functions to perform a Fast Fourier Transform on the Analog inputs on our laptop or desktop computer.

The analog I/O functionality is based on the code described in this blog entry :
How To Connect Digital Signal Processing Functions To An ADC And A DAC

We can use the free version of SigLib (no source code) from here : http://www.numerix-dsp.com/free/.
Before proceeding any further, please ensure :

  That SigLib is installed and configured correctly for your compiler. Full details are included in the SigLib User's Guide which is provided with the library and also available here : http://www.numerix-dsp.com/docs/.
  That GnuPlot and Gnuplot/C are both installed and working correctly : http://realgonegeek.blogspot.co.uk/search?q=gnuplot.

The code uses a double buffering technique so that the analog input code writes the data into a separate array to the one being processed with the FFT functions.

Here is the source code for the top level function (pa_fft.c) :

#include <stdio.h>
#include <math.h>
#include "analog_io.h"
#include <siglib.h>                                 /* SigLib DSP library */
#include <gnuplot_c.h>                              /* Gnuplot/C */

#if _MSC_VER                                        // If compiler not MSVC then include kbhit.h
#include <conio.h>
#else
#include "kbhit.h"
#endif

#define SAMPLE_RATE     44100

#define FFT_LENGTH      ((SLArrayIndex_t)1024)
#define LOG2_FFT_LENGTH ((SLArrayIndex_t)10)
#define HALF_FFT_LENGTH (FFT_LENGTH >> 1)
#define WINDOW_SIZE     FFT_LENGTH

/* Declare global variables and arrays */
SLData_t    *pRealData, *pImagData, *pWindowCoeffs, *pResults, *pFFTCoeffs;


volatile int Input_SamplesCount;
volatile int Input_Data_Valid_Flag;
volatile int Input_Data_Buffer;

SLData_t  *pInputData, *pProcessData;
SLData_t  Data1 [FFT_LENGTH], Data2 [FFT_LENGTH];

h_GPC_Plot  *h2DPlot;                           /* Plot object */

void analog_isr (void)
{
                            // Process channel 0 - Store data for FFT and output zero
  *(pInputData+Input_SamplesCount) = (double)adc_in0;
  dac_out0 = 0;
                            // Process channel 1 - just output zero
  dac_out1 = 0;

  Input_SamplesCount++;
  if (Input_SamplesCount == FFT_LENGTH)           // If we have filled the buffer, mark data as valid and swap buffers
  {
    Input_Data_Valid_Flag = 1;                    // FFT Data is now valid
    Input_SamplesCount = 0;                       // Reset input sampels count

    if (Input_Data_Buffer == 1)
    {
      pInputData = Data2;                         /* Input to array 1  */
      pProcessData = Data1;                       /* Perform FFT on array 2 */
      Input_Data_Buffer = 2;
    }
    else              // (Input_Data_Buffer == 2)
    {
      pInputData = Data1;                         /* Input to array 1  */
      pProcessData = Data2;                       /* Perform FFT on array 2 */
      Input_Data_Buffer = 1;
    }
  }
}

void process_FFT (void)

{

  SDA_Copy (pProcessData, pRealData, FFT_LENGTH); // Copy data for processing

  Input_Data_Valid_Flag = 0;                      // FFT Data has been copied for processing

                                                  /* Apply window to data */
  SDA_Window (pRealData,                          /* Pointer to source array */
              pRealData,                          /* Pointer to destination array */
              pWindowCoeffs,                      /* Pointer to window oefficients */
              WINDOW_SIZE);                       /* Window length */

                                                  /* Perform real FFT */
  SDA_Rfft (pRealData,                            /* Pointer to real array */
            pImagData,                            /* Pointer to imaginary array */
            pFFTCoeffs,                           /* Pointer to FFT coefficients */
            SIGLIB_NULL_ARRAY_INDEX_PTR,          /* Pointer to bit reverse address table - NOT USED */
            FFT_LENGTH,                           /* FFT length */
            LOG2_FFT_LENGTH);                     /* log2 FFT length */

  SDA_LogMagnitude (pRealData,                    /* Pointer to real source array */
                    pImagData,                    /* Pointer to imaginary source array */
                    pResults,                     /* Pointer to log magnitude destination array */
                    HALF_FFT_LENGTH);             /* Data dataset length */

  SDA_Offset (pResults, -138.0, pResults, HALF_FFT_LENGTH); // Offset graph results for 0 dB

  gpc_plot_2d (h2DPlot,                           /* Graph handle */
               pResults,                          /* Dataset */
               HALF_FFT_LENGTH,                   /* Dataset length */
               "FFT of ADC Input Data",           /* Dataset title */
               SIGLIB_ZERO,                       /* Minimum X value */
               (double)(HALF_FFT_LENGTH - 1),     /* Maximum X value */
               "lines",                           /* Graph type */
               "blue",                            /* Colour */
               GPC_NEW);                          /* New graph */

}

int main(void)
{
  int Error;

  Input_SamplesCount = 0;                         // Initialize flags
  Input_Data_Valid_Flag = 0;
  Input_Data_Buffer = 0;

  pInputData = Data1;                             /* Input to array 1  */
  pProcessData = Data2;                           /* Perform FFT on array 2 */

                                                  /* Allocate memory */
  pRealData = SUF_VectorArrayAllocate (FFT_LENGTH);
  pImagData = SUF_VectorArrayAllocate (FFT_LENGTH);
  pFFTCoeffs = SUF_FftCoefficientAllocate (FFT_LENGTH);
  pResults = SUF_VectorArrayAllocate (FFT_LENGTH);        /* RMS result array */
  pWindowCoeffs = SUF_VectorArrayAllocate (WINDOW_SIZE);  /* Window array */

  h2DPlot =                                       /* Initialize plot */
      gpc_init_2d ("Fast Fourier Transform",      /* Plot title */
                   "Frequency",                   /* X-Axis label */
                   "Magnitude",                   /* Y-Axis label */
                   120.0,                         /* Scaling mode */
                   GPC_NEGATIVE,                  /* Sign mode */
                   GPC_MULTIPLOT,                 /* Multiplot / fast plot mode */
                   GPC_KEY_DISABLE);               /* Legend / key mode */
  if (h2DPlot == NULL)
  {
      printf ("\nPlot creation failure.\n");
      exit (1);
  }

  SIF_Window (pWindowCoeffs,                      /* Pointer to window oefficient */
              SIGLIB_HANNING,                     /* Window type */
              SIGLIB_ZERO,                        /* Window coefficient */
              FFT_LENGTH);                        /* Window length */

                                                  /* Initialise FFT */
  SIF_Fft (pFFTCoeffs,                            /* Pointer to FFT coefficients */
           SIGLIB_NULL_ARRAY_INDEX_PTR,           /* Pointer to bit reverse address table - NOT USED */
           FFT_LENGTH);                           /* FFT length */

  Error = analog_open (SAMPLE_RATE, analog_isr);  // Open the analog interface
  if (Error == -1)
    return 1;

  printf("Hit ENTER to stop program.\n");
  while (!kbhit())                                // Wait until key hit
  {
    if (Input_Data_Valid_Flag == 1)               // If data is valid process the FFT
    {
      process_FFT ();
    }
  }

  Error = analog_close ();                        // Close the analog interface
  if (Error == -1)
    return 1;

  gpc_close (h2DPlot);

  SUF_MemoryFree (pRealData);                     /* Free memory */
  SUF_MemoryFree (pImagData);
  SUF_MemoryFree (pResults);
  SUF_MemoryFree (pWindowCoeffs);
  SUF_MemoryFree (pFFTCoeffs);

  return 0;
}

Notes :
  The global variables are decalared as volatile. This prevents the compiler optimizing away the code, which often happens when variables are used in interrupt service routines because the compiler has no way of knowing how the ISR is going to be called.
  GCC and some other compilers do not implement the kbhit () function so I have included the code from here : http://cboard.cprogramming.com/linux-programming/51531-faq-cached-input-mygetch.html#post357655.

Testing

Under Microsoft Visual Studio the code can be built using the following command (assuming you have already got a working PortAudio installation) :
cl pa_fft.c analog_io.c -W4 -DPA_USE_ASIO=1 -D "SIGLIB_STATIC_LIB=1" -D "_CRT_SECURE_NO_WARNINGS=1" portaudio_x86.lib siglib.lib gnuplot_c.lib nhl.lib

My laptop only supports a single microphone input to the ADC so to test the code I used an XMOS Multi-Function Audio (MFA) platform, shown in this image :



The XMOS xCORE processor on this board is more than capable of handling the signal processing code shown in this blog but for the purposes of the demonstration I am running the DSP code on the host.

If you run the code yourself you will see the the FFT output plotted in Gnuplot and should look something like this - if you apply a noisy sinewave to the input ;-) :


Stay tuned for a future blog post where I will implement this code and the IIR filter code on an XMOS xCORE :-)

If you have found this solution useful then please do hit the Google (+1) button so that others may be able to find it as well.
Numerix-DSP Libraries : http://www.numerix-dsp.com/eval/

DSP Tech Brief : How To Connect Digital Signal Processing Functions To An ADC And A DAC

In my earlier post I showed how to implement an Infinite Impulse Response (IIR) filter :
Let's Do Some More Digital Signal Processing - Infinite Impulse Response (IIR) Filters.

In this blog I will look at how to interface DSP functions (e.g. the SigLib DSP library functions) to an Analog to Digital Converter (ADC) or Digital to Analog Converter (DAC). For this blog I will use the IIR filter functions mentioned in the above blog entry.

The system configuration is shown in this diagram :



In order to handle the ADC/DAC data input and output (I/O) it is necessary to write an Interrupt Service Routine (ISR) that responds to the interrupt from the ADC/DAC.

Any computer with a soundcard can be used to demonstrate this however modern Operating Systems trap interrupts and use drivers to interface to the hardware. In order to demonstrate hardware interrupts it is necessary to write a virtual software interrupt generator using a call back function.
I am going to use the free, cross-platform open-source audio I/O library PortAudio to implement the I/O then I will call the callback function that we will use as our ISR.

On a typical embedded system it will not be necessary to use PortAudio, just hook your ISR function to the interrupt from your ADC/DAC and it should work in exactly the same way.

Here is the header file (analog_io.h.) Note that I have declared the input and output registers as global variables. I know this doesn't conform to any standards for structured programming however these are designed to represent the input and output registers of the ADC and DAC devices.

int analog_open (int SampleRate, void (*analog_isr) (void));
int analog_close (void);

volatile short adc_in0, adc_in1, dac_out0, dac_out1;

Here is the wrapper code (analog_io.c.) for simulating our ISR using PortAudio. Note the pointer to the ISR is passed to the function analog_open () along with the desired sample rate.


#include <stdio.h>
#include "portaudio.h"
#include "analog_io.h"

#define PA_SAMPLE_TYPE      paInt32
#define FRAMES_PER_BUFFER   (64)

void (*p_analog_isr) (void);

typedef int SAMPLE;
PaStream *stream;

static int paCallback( const void *inputBuffer, void *outputBuffer,
                       unsigned long framesPerBuffer,
                       const PaStreamCallbackTimeInfo* timeInfo,
                       PaStreamCallbackFlags statusFlags,
                       void *userData );

/* PortAudio callback */
static int paCallback( const void *inputBuffer, void *outputBuffer,
                       unsigned long framesPerBuffer,
                       const PaStreamCallbackTimeInfo* timeInfo,
                       PaStreamCallbackFlags statusFlags,
                       void *userData )
{
    SAMPLE *out = (SAMPLE*)outputBuffer;
    const SAMPLE *in = (const SAMPLE*)inputBuffer;
    unsigned int i;
    (void) timeInfo; /* Prevent unused variable warnings. */
    (void) statusFlags;
    (void) userData;

    if( inputBuffer == NULL )         // If input buffer empty then output silence
    {
        for( i=0; i<framesPerBuffer; i++ )
        {
            *out++ = 0;               /* Left */
            *out++ = 0;               /* Right */
        }
    }
    else                              // If input buffer NOT empty then process data
    {
        for( i=0; i<framesPerBuffer; i++ )
        {
          adc_in0 = (short)(*in++ >> 16);            /* Left */
          adc_in1 = (short)(*in++ >> 16);            /* Right */
          p_analog_isr ();
          *out++ = (int)(dac_out0 << 16);
          *out++ = (int)(dac_out1 << 16);
        }
    }
    return paContinue;
}


int analog_open (int SampleRate, void (*analog_isr) (void))
{
  p_analog_isr = analog_isr;

  PaStreamParameters inputParameters, outputParameters;
  PaError err;

  err = Pa_Initialize();
  if( err != paNoError ) goto error;

  inputParameters.device = Pa_GetDefaultInputDevice(); /* default input device */
  if (inputParameters.device == paNoDevice) {
    fprintf(stderr,"Error: No default input device.\n");
    goto error;
  }
  inputParameters.channelCount = 2;       /* stereo input */
  inputParameters.sampleFormat = PA_SAMPLE_TYPE;
  inputParameters.suggestedLatency = Pa_GetDeviceInfo( inputParameters.device )->defaultLowInputLatency;
  inputParameters.hostApiSpecificStreamInfo = NULL;

  outputParameters.device = Pa_GetDefaultOutputDevice(); /* default output device */
  if (outputParameters.device == paNoDevice) {
    fprintf(stderr,"Error: No default output device.\n");
    goto error;
  }
  outputParameters.channelCount = 2;       /* stereo output */
  outputParameters.sampleFormat = PA_SAMPLE_TYPE;
  outputParameters.suggestedLatency = Pa_GetDeviceInfo( outputParameters.device )->defaultLowOutputLatency;
  outputParameters.hostApiSpecificStreamInfo = NULL;

  err = Pa_OpenStream(&stream,
                      &inputParameters,
                      &outputParameters,
                      SampleRate,
                      FRAMES_PER_BUFFER,
                      0, /* paClipOff, */  /* we won't output out of range samples so don't bother clipping them */
                      paCallback,
                      NULL );
  if( err != paNoError ) goto error;

  err = Pa_StartStream( stream );
  if( err != paNoError ) goto error;

  return 0;

error:
  Pa_Terminate();
  fprintf( stderr, "An error occured while using the portaudio stream\n" );
  fprintf( stderr, "Error number: %d\n", err );
  fprintf( stderr, "Error message: %s\n", Pa_GetErrorText( err ) );
  return -1;
}

int analog_close (void)

{
  int err;

  err = Pa_CloseStream( stream );
  if( err != paNoError ) goto error;

  Pa_Terminate();
  return 0;

error:
    Pa_Terminate();
    fprintf( stderr, "An error occured while using the portaudio stream\n" );
    fprintf( stderr, "Error number: %d\n", err );
    fprintf( stderr, "Error message: %s\n", Pa_GetErrorText( err ) );
  return -1;
}

Here is the header file (iir.h.) for the IIR filtering functions :

void iir_init (double *pState,
  const int NumberOfStages);

double iir_filter (const double Source,
  double *pState,
  const double *pCoeffs,
  const int NumberOfStages);

And here are the IIR filtering functions (taken from the blog mentioned earlier)  (iir.c.) :

#include "iir.h"

void iir_init (double *pState,
  const int NumberOfStages)

{
  int  i;

      /* Initialise the filter state array to 0 */
  for (i = 0; i < (NumberOfStages * 2); i++)
  {
    *pState++ = 0.0;
  }

}   /* End of iir_init () */


double iir_filter (const double Source,
  double *pState,
  const double *pCoeffs,
  const int NumberOfStages)

{
  register double       FeedbackSumOfProducts;
  register int i;
  register double       TempInputData;

  TempInputData = Source;

  for (i = 0; i < NumberOfStages; i++)
  {
    FeedbackSumOfProducts = TempInputData - (*(pCoeffs+3) * *pState) -
        (*(pCoeffs+4) * *(pState+1));       /* Feedback */
    TempInputData = (*pCoeffs * FeedbackSumOfProducts) +
        (*(pCoeffs + 1) * *pState) +
        (*(pCoeffs + 2) * *(pState+1));     /* Feedforward and save result for next time round */
    
    *(pState+1) = *pState;                  /* Move delayed samples */
    *pState = FeedbackSumOfProducts;

    pState += 2;        /* Increment array pointers */
    pCoeffs += 5;
  }

  return (TempInputData);                     /* Save output */

}   /* End of iir_filter () */

Now all we need is the top level program (pa_filter.c) which includes the filter coefficients (generated using Digital Filter Plus http://www.numerix-dsp.com/dfplus/), the main () function to initialize the filters and the analog interface and we need the ISR to call the DSP functions when the interrupt occurs.

#include <stdio.h>
#include <math.h>
#include "analog_io.h"
#include "iir.h"

#define SAMPLE_RATE     44100
#define FILTER_STAGES   1           /* Single 2nd ordrer biquad */

// Fc = 1 kHz LPF
double LPFCoefficientArray [] =
{
3.43448050764769180000e-003, 6.86896101529538370000e-003, 3.43448050764769180000e-003,
-1.89857508921973460000e+000, 9.12313011250325380000e-001
};

// Fc = 1 kHz HPF
double HPFCoefficientArray [] =
{
9.32663817644617140000e-001, -1.86532763528923430000e+000, 9.32663817644617140000e-001,
-1.85191320077338670000e+000, 8.78746746701696060000e-001
};

double Chan0FilterState [2 * FILTER_STAGES], Chan1FilterState [2 * FILTER_STAGES];


void analog_isr (void)
{
  double analog_sample;

                            // Process channel 0
  analog_sample = (double)adc_in0;
  analog_sample = iir_filter (analog_sample, Chan0FilterState, LPFCoefficientArray, FILTER_STAGES);
  dac_out0 = (short)(analog_sample);

                            // Process channel 1
  analog_sample = (double)adc_in1;
  analog_sample = iir_filter (analog_sample, Chan1FilterState, HPFCoefficientArray, FILTER_STAGES);
  dac_out1 = (short)(analog_sample);

}

int main(void)
{
  int Error;

  iir_init (Chan0FilterState, FILTER_STAGES);
  iir_init (Chan1FilterState, FILTER_STAGES);

  Error = analog_open (SAMPLE_RATE, analog_isr);  // Open the analog interface
  if (Error == -1)
    return 1;

  printf("Hit ENTER to stop program.\n");
  getchar();

  Error = analog_close ();                        // Close the analog interface
  if (Error == -1)
    return 1;

  return 0;
}

Testing

Under Microsoft Visual Studio the code can be built using the following command (assuming you have already got a working PortAudio installation) :
cl pa_filter.c analog_io.c iir.c -DPA_USE_ASIO=1 portaudio_x86.lib

My laptop only supports a single microphone input to the ADC so to test the code I used an XMOS Multi-Function Audio (MFA) platform, shown in this image :



The XMOS xCORE processor on this board is more than capable of handling the signal processing code shown in this blog but for the purposes of the demonstration I am running the DSP code on the host.

If you run the code yourself you will hear the difference between the two channels after applying the Low-pass and High-pass filters.

If you have found this solution useful then please do hit the Google (+1) button so that others may be able to find it as well.
Numerix-DSP Libraries : http://www.numerix-dsp.com/eval/

Let's Do Some More Digital Signal Processing - Infinite Impulse Response (IIR) Filters


Following on from my earlier blog : Let's Do Some Digital Signal Processing (Part 2) - One Pole Filter I thought it was about time that I took a look at Infinite Impulse Response (IIR) filters.

IIR filters give much higher performance than Finite Impulse Response filters because they use feedback as well as feedforward in order to implement the filter response.

This code shows how to implement an IIR biquad filter structure.

#include <stdio.h>

#define COUNT 8  /* Number of samples to process */

#define FILTER_STAGES   1           /* Single 2nd ordrer biquad */

// Fs = 44.1 kHz, Fc = 1 kHz LPF
double LPFCoefficientArray [] =
{
3.43448050764769180000e-003, 6.86896101529538370000e-003, 3.43448050764769180000e-003,
-1.89857508921973460000e+000, 9.12313011250325380000e-001
};

float impulseSrc[] = {
 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
 };
float stepSrc[] = {
 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0
 };
float Dst[] = {
 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
 };

double FilterState [2 * FILTER_STAGES];

void iir_init (double *pState,
  const int NumberOfStages);

double iir_filter (const double Source,
  double *pState,
  const double *pCoeffs,
  const int NumberOfStages);

void main (void)

{
  int i;

  iir_init (FilterState, FILTER_STAGES);      /* Initialize the filter */
  for (i = 0; i < COUNT; i++)
    Dst[i] = iir_filter (impulseSrc[i], FilterState, LPFCoefficientArray, FILTER_STAGES);
  printf ("One Pole Impulse = %f, %f, %f, %f, %f, %f, %f, %f\n", Dst[0], Dst[1], Dst[2], Dst[3], Dst[4], Dst[5], Dst[6], Dst[7]);

  iir_init (FilterState, FILTER_STAGES);      /* Initialize the filter */
  for (i = 0; i < COUNT; i++)
    Dst[i] = iir_filter (stepSrc[i], FilterState, LPFCoefficientArray, FILTER_STAGES);
  printf ("One Pole Step = %f, %f, %f, %f, %f, %f, %f, %f\n", Dst[0], Dst[1], Dst[2], Dst[3], Dst[4], Dst[5], Dst[6], Dst[7]);

}


void iir_init (double *pState,
  const int NumberOfStages)

{
  int  i;

      /* Initialise the filter state array to 0 */
  for (i = 0; i < (NumberOfStages * 2); i++)
  {
    *pState++ = 0.0;
  }

}   /* End of iir_init () */


double iir_filter (const double Source,
  double *pState,
  const double *pCoeffs,
  const int NumberOfStages)

{
  register double       FeedbackSumOfProducts;
  register int i;
  register double       TempInputData;

  TempInputData = Source;

  for (i = 0; i < NumberOfStages; i++)
  {
    FeedbackSumOfProducts = TempInputData - (*(pCoeffs+3) * *pState) -
        (*(pCoeffs+4) * *(pState+1));       /* Feedback */
    TempInputData = (*pCoeffs * FeedbackSumOfProducts) +
        (*(pCoeffs + 1) * *pState) +
        (*(pCoeffs + 2) * *(pState+1));     /* Feedforward and save result for next time round */
    
    *(pState+1) = *pState;                  /* Move delayed samples */
    *pState = FeedbackSumOfProducts;

    pState += 2;        /* Increment array pointers */
    pCoeffs += 5;
  }

  return (TempInputData);                     /* Save output */

}   /* End of iir_filter () */


The result of running this code is :
One Pole Impulse = 0.003434, 0.013390, 0.025722, 0.036620, 0.046059, 0.054038, 0.060575, 0.065706
One Pole Step = 0.003434, 0.016824, 0.042546, 0.079167, 0.125226, 0.179264, 0.239839, 0.305545

The filter implemented in this example is a low-pass filter with 1 kHz cut-off frequency and a sampling rate of 44.1 kHz (CD frequency). This can be observed by modifying the above code to generate a longer impulse response (e.g. 1024) and then passing this data to a Fast Fourier Transform (FFT).

If you have found this solution useful then please do hit the Google (+1) button so that others may be able to find it as well.
Numerix-DSP Libraries : http://www.numerix-dsp.com/eval/

Wednesday 7 January 2015

Timing C Code Running On the XMOS xCORE Architecture

I recently had to show a customer how long a piece of code took to execute on the XMOS xCORE.

The nice thing about the xCORE is that it includes hardware timers that can be directly accessed from the application.

In order to be able to benchmark the code I knew there would be a small overhead to call the timers from the top level C code so my first task was to calculate this.

Having calculated the overhead I was now free to benchmark the actual code then remove the overhead to print the correct time.

As an aside, one of the great benefits of the XMOS architecture is how debug_printf's are implemented. Essentially the output of a printf can be routed over the inter-processor communications link called xCONNECT, in real-time. This allows debug_printf's to be located directly inside real-time code, unlike other microcontrollers where locating a printf inside an interrupt service routine is guaranteed to cause a run-time exception (AKA crash). Note, the code below uses regular printfs so that it can be compiled to run on any CPU, with suitable #defines/#ifs around the XMOS benchmarking code.

Here is the benchmarking code showing the performance of a simple piece of C code, with inline assembly:

#include <stdio.h>
#include "xbenchmark.h"

int main (void) 
{
  int start_time, end_time, overhead_time;
  overhead_time = xbench_init();

  start_time = xbench_get_time();
  asm ("nop");
  asm ("nop");
  end_time = xbench_get_time();
  
  printf ("Elapsed time = %d cycles\n", end_time - start_time - overhead_time);

  return (0);
}

Here is the xbenchmark.h file:

// XMOS Benchmarking Functions

int xbench_get_time(void);

int xbench_init(void);


Here is the xbenchmark.xc file:

// XMOS Benchmarking Functions

// Include files
#include <xs1.h>

timer t;

int xbench_get_time(void)
{
    int time;
    t :> time;
    return(time);
}

int xbench_init(void)
{
    int start_time, end_time;

    start_time = xbench_get_time();
    end_time = xbench_get_time();
    return(end_time - start_time);
}

Here is the command for compiling the C code:

xcc -O3 app.c xbenchmark.xc -target=XCORE-200-EXPLORER -o app.xe

And here is the output:

Elapsed time = 2 cycles

Phew, two nops takes two cycles :-)

Notes
XMOS use the incredible LLVM compiler, which I am sure will one day take over from GCC. It is almost never necessary to revert to assembly code but if you need to then you can see from the above example just how easy it is to use.
In order to read and write to the XMOS hardware (such as the timer, as shown above) it is necessary to use XC, rather than C, as shown above. The XC functions are documented in the XMOS Programming Guide : https://www.xmos.com/download/public/XMOS-Programming-Guide-(documentation)(E).pdf.

If you have found this solution useful then please do hit the Google (+1) button so that others may be able to find it as well.
Numerix-DSP Libraries : http://www.numerix-dsp.com/eval/

DSP Tech Brief : C Code To Implement The Q Format Number System

I recently posted an "Introduction To The Q Format Number System" : http://realgonegeek.blogspot.co.uk/2015/01/introduction-to-q-format-number-system.html.

This afternoon I was driving back from a meeting and thought I should put some code together to demonstrate the operations. Here it is (you will notice that I have used a somewhat more useful word length than 8 bits for this piece of code) :


#include <stdio.h>

double QFormatIntegerToFloat (const int x,
  const int n)

{
  return (((double)x) / ((double)(1 << n)));
}

int FloatToQFormatInteger (const double x,
  const int m,
  const int n)

{
  int   i;
  int   y;
  int   mask = 0;

  y = (int)(x * ((double)(1 << n)));  // Generate the integer number in m.n format

  for (i = 0; i < (m+n); i++)         // Calculate the mask to ensure we return the correct number of valid bits
  {
    mask = (mask << 1) + 1;
  }

  return (y & mask);
}

int MpyQFormatInteger (const int x,
  const int y,
  const int n)

{
  long long tmp;
  tmp = ((long long)x * (long long)y) >> n;
  return ((int)tmp);
}


int main() 
{
  int         a, b, c, d, e;


  printf ("%lf = 0x%x\n",  2.0 , FloatToQFormatInteger (2.0 , 8, 24));
  printf ("%lf = 0x%x\n",  1.5 , FloatToQFormatInteger (1.5 , 8, 24));
  printf ("%lf = 0x%x\n",  0.5 , FloatToQFormatInteger (0.5 , 8, 24));
  printf ("%lf = 0x%x\n",  0.25, FloatToQFormatInteger (0.25, 8, 24));
  printf ("%lf = 0x%x\n",  0.75, FloatToQFormatInteger (0.75, 8, 24));
  printf ("\n");

  a = FloatToQFormatInteger (0.75, 8, 24);
  b = FloatToQFormatInteger (2.0 , 8, 24);
  c = MpyQFormatInteger (a, b, 24);

  printf ("Result = 0x%x : %lf\n", c, QFormatIntegerToFloat (c , 24));

  d = FloatToQFormatInteger (0.5 , 8, 24);
  e = MpyQFormatInteger (c, d, 24);

  printf ("Result = 0x%x : %lf\n", e, QFormatIntegerToFloat (e , 24));

  return (0);
}

Here is the output :

2.000000 = 0x2000000
1.500000 = 0x1800000
0.500000 = 0x800000
0.250000 = 0x400000
0.750000 = 0xc00000

Result = 0x1800000 : 1.500000
Result = 0xc00000 : 0.750000

If you have found this solution useful then please do hit the Google (+1) button so that others may be able to find it as well.
Numerix-DSP Libraries : http://www.numerix-dsp.com/eval/