/*
 * Copyright (c) 2007 John DeHart and Washington University in St. Louis.
 * All rights reserved
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *    1. Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *    2. Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *    3. The name of the author or Washington University may not be used 
 *       to endorse or promote products derived from this source code 
 *       without specific prior written permission.
 *    4. Conditions of any other entities that contributed to this are also
 *       met. If a copyright notice is present from another entity, it must
 *       be maintained in redistributions of the source code.
 *
 * THIS INTELLECTUAL PROPERTY (WHICH MAY INCLUDE BUT IS NOT LIMITED TO SOFTWARE,
 * FIRMWARE, VHDL, etc) IS PROVIDED BY THE AUTHOR AND WASHINGTON UNIVERSITY 
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR WASHINGTON UNIVERSITY 
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
 * ARISING IN ANY WAY OUT OF THE USE OF THIS INTELLECTUAL PROPERTY, EVEN IF 
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * */
/*
 * File: plugin_dl.c  
 * Author: Charlie Wiseman
 * Email: cgw1@arl.wustl.edu
 * Organization: Applied Research Laboratory
 * 
 * Derived from: dl_source.c (for ONL router)
 *
 * Date Created: 8/20/2007 
 * 
 * Description: Dispatch loop functionality tailored for plugins.
 *
 * Modification History:
 */

#ifndef _PLUGIN_DL_C
#define _PLUGIN_DL_C

#include "dl_system.h"
#include "system_init.h"
#include "dl_buf.c"
#include "dl_meta.h"
#include "scratch_rings_WU.h"
#include "sram_rings_WU.h"
#include "ring_formats.h"

#include "plugin_dl.h"

/* next two needed for using SRAM rings */
#define SRAM_CONTROL_RING_CHANNEL 3
#define QDESC_CHANNEL_BITPOS 30

#ifdef DL_ORDERED
  SIGNAL dl_sink_packet_sig;
  SIGNAL dl_source_packet_sig;
#endif

#define UNLOCKED 0
#define LOCKED   1
__declspec(shared local_mem) unsigned int dl_sink_message_lock;
__declspec(shared local_mem) unsigned int dl_source_message_lock;

void sram_ring_get_buffer_1word(unsigned int,__declspec(gp_reg, aligned(4)) unsigned int *);
void sram_ring_get_buffer_nwords(unsigned int,__declspec(gp_reg, aligned(4)) unsigned int *, unsigned int);
void sram_ring_put_buffer_nwords(unsigned int,__declspec(gp_reg, aligned(4)) unsigned int *, unsigned int);

static void start_packet_rotation(SIGNAL *);
static void wait_packet_signal(SIGNAL *);
static void send_packet_signal(SIGNAL *);

extern plc_plugin_data ring_in;
extern plugin_out_data ring_out;

//-------------------------------------------------------------------
// dl_sink_init
//
//    Description:
//      Fix the value of the incoming ring-ready signal so RX can indicate when 
//      all rings are created. Start the signalling rotation for dl_sink.
//
//    Parameters:
//      Outputs: n/a
//      In/Outs: n.a
//      Inputs: n/a
//      Constants: n/a
//      Labels: n/a
//
//    Side effects: n/a
//
//    See also: n/a
//
void dl_sink_init()
{
  if(ctx() == 0)
  {
    // Fix the value of the incoming ring-ready signal so the
    // rx task can indicate when that ring is created.
    __assign_relative_register((void *)&rx_ring_ready_sig, ME_INIT_SIGNAL);

    dl_sink_message_lock = UNLOCKED;
  }
                        
#ifdef DL_ORDERED
  start_packet_rotation(&dl_sink_packet_sig);
#endif
}

//-------------------------------------------------------------------
// dl_source_init
//
//    Description:
//              Wait for the RX to signal creation of
//              the rings, and then enter the signalling rotation for dl_source
//
//    Parameters:
//      Outputs: n/a
//      In/Outs: n.a
//      Inputs: n/a
//      Constants: n/a
//      Labels: n/a
//
//    Side effects: n/a
//
//    See also: n/a
//
void dl_source_init()
{
  if (ctx() == 0)
  {
    // Wait for a signal from RX indicating the incoming ring is ready
    wait_for_all(&rx_ring_ready_sig);
    dl_source_message_lock = UNLOCKED;
  }

#ifdef DL_ORDERED
  start_packet_rotation(&dl_source_packet_sig);
#endif
}

///////////////////////////////////////////////////////////////////////////////
// dl_sink_packet:
//    Description:
//      Enqueue a packet from current processing block to the next processing
//		block, depending on which microengine
//		is running this.
//      dl_sink is called multiple times when multiple packets need to be enqueued.
//      dl_sink supports ordered thread execution if DL_ORDERED is defined. Enqueue
//              process is considered as a critical session and only one thread can
//              be enqueuing packets at a time. After the thread enqueues its last 
//              packet, it call exit_block() to pass the signal to the next thread. If 
//              the current packet is the first one to be enqueued, enter_block()
//              is called to wait for the signal from previous thread.
//
//    Parameters:
//      Outputs: n/a
//      In/Outs: n.a
//      Inputs: 
//              unsigned int sink:  where to sink packet to
//      Constants: n/a
//      Labels: n/a
//
//    Side effects: n/a
void dl_sink_packet(unsigned int sink)
{
#ifdef DL_ORDERED
  wait_packet_signal(&dl_sink_packet_sig);
#endif
    
  if(sink == QM)
  {   
    scr_ring_put_buffer_3word(PLUGIN_TO_QM_RING, ring_out.i, 0);
  }
  else if(sink == MUX)
  {
    sram_ring_put_buffer_3word(PLUGINS_TO_MUX_SRAM_RING, ring_out.i, 0);
  }
  else if(sink == XSCALE_LD)
  {
    scr_ring_put_buffer_6word(TO_XSCALE_LD_RING, ring_out.i, 0);
  }
  else if(sink == XSCALE_EXC)
  {
    scr_ring_put_buffer_6word(TO_XSCALE_EXC_RING, ring_out.i, 0);
  }
  else if(sink == XSCALE_ERR)
  {
    scr_ring_put_buffer_6word(TO_XSCALE_ERR_RING, ring_out.i, 0);
  }
  else if(sink == PACKET_IN_RING_0)
  {
    sram_ring_put_buffer_6word(PLC_TO_PLUGIN_0_SRAM_RING, ring_out.i, 0);
  }
  else if(sink == PACKET_IN_RING_1)
  {
    sram_ring_put_buffer_6word(PLC_TO_PLUGIN_1_SRAM_RING, ring_out.i, 0);
  }
  else if(sink == PACKET_IN_RING_2)
  {
    sram_ring_put_buffer_6word(PLC_TO_PLUGIN_2_SRAM_RING, ring_out.i, 0);
  }
  else if(sink == PACKET_IN_RING_3)
  {
    sram_ring_put_buffer_6word(PLC_TO_PLUGIN_3_SRAM_RING, ring_out.i, 0);
  }
  else if(sink == PACKET_IN_RING_4)
  {
    sram_ring_put_buffer_6word(PLC_TO_PLUGIN_4_SRAM_RING, ring_out.i, 0);
  }
  else if(sink == DROP) // drop the packet
  {
    scr_ring_put_buffer_1word(DL_DROP_RING, ring_out.i, 0);
  }
  else // assume sink == DO_NOTHING
  { 
  }

#ifdef DL_ORDERED
  send_packet_signal(&dl_sink_packet_sig);
#endif  
}

///////////////////////////////////////////////////////////////////////////////
// dl_source_packet:
// Description:
//      Dequeue a packet from previous processing block, depending on which microengine
//		is running this.
//      dl_source is called once to dequeue only one packet from a specified ring.
//      dl_source supports ordered thread execution if DL_ORDERED is defined. Dequeue
//              process is considered as a critical session and only one thread can
//              be dequeuing packets at a time. 
//
//    Parameters:
//      Outputs: n/a
//      In/Outs: n/a
//      Inputs: unsigned int source - id of the previous block from which a packet is to be received.
//      Constants: n/a
//      Labels: n/a
//
//    Side effects: n/a
void dl_source_packet(unsigned int source)
{
  __declspec(gp_reg) unsigned int sring;

#ifdef DL_ORDERED
  wait_packet_signal(&dl_source_packet_sig);
#endif

  if(source == PACKET_IN_RING_0)
  {
    sring = PLC_TO_PLUGIN_0_SRAM_RING;
  }
  else if(source == PACKET_IN_RING_1)
  {
    sring = PLC_TO_PLUGIN_1_SRAM_RING;
  }
  else if(source == PACKET_IN_RING_2)
  {
    sring = PLC_TO_PLUGIN_2_SRAM_RING;
  }
  else if(source == PACKET_IN_RING_3)
  {
    sring = PLC_TO_PLUGIN_3_SRAM_RING;
  }
  else if(source == PACKET_IN_RING_4)
  {
    sring = PLC_TO_PLUGIN_4_SRAM_RING;
  }
  else 
  {
    return;
  }

  sram_ring_get_buffer_6word(sring, ring_in.i);
  while (ring_in.i[0] == 0)
  {
      ctx_swap();
      sram_ring_get_buffer_6word(sring, ring_in.i);
  }

#ifdef DL_ORDERED
  send_packet_signal(&dl_source_packet_sig);
#endif
}

///////////////////////////////////////////////////////////////////////////////
// dl_sink_message:
// Description:
//      Enqueue a control message to XScale.  A local memory variable is used
//      to ensure only one thread is reading from the ring at any one time.
//
//    Parameters:
//      Outputs: n/a
//      In/Outs: n/a
//      Inputs: unsigned int source - id of the previous block from which a packet is to be received.
//      Constants: n/a
//      Labels: n/a
//
//    Side effects: n/a
void dl_sink_message(unsigned int sink, __declspec(gp_reg, aligned(4)) unsigned int *msg)
{
  __declspec(gp_reg) unsigned int sring;

  // only procede once the lock is available
  while(dl_sink_message_lock == LOCKED)
  {
    ctx_swap();
  }
  dl_sink_message_lock = LOCKED;

  if(sink == MESSAGE_OUT_RING_0)
  {
    sring = ONL_PLUGIN_0_TO_XSCALE_CTRL_SRAM_RING;
  }
  else if(sink == MESSAGE_OUT_RING_1)
  {
    sring = ONL_PLUGIN_1_TO_XSCALE_CTRL_SRAM_RING;
  }
  else if(sink == MESSAGE_OUT_RING_2)
  {
    sring = ONL_PLUGIN_2_TO_XSCALE_CTRL_SRAM_RING;
  }
  else if(sink == MESSAGE_OUT_RING_3)
  {
    sring = ONL_PLUGIN_3_TO_XSCALE_CTRL_SRAM_RING;
  }
  else if(sink == MESSAGE_OUT_RING_4)
  {
    sring = ONL_PLUGIN_4_TO_XSCALE_CTRL_SRAM_RING;
  }
  else
  {
    return;
  }


  sram_ring_put_buffer_nwords(sring, &msg[0], (((msg[0]>>16) & 0xFF)+1));

  dl_sink_message_lock = UNLOCKED;
}

///////////////////////////////////////////////////////////////////////////////
// dl_source_message:
// Description:
//      Dequeue a control message from XScale.  A local memory variable is used
//      to ensure only one thread is reading from the ring at any one time.
//
//    Parameters:
//      Outputs: n/a
//      In/Outs: n/a
//      Inputs: unsigned int source - id of the previous block from which a packet is to be received.
//      Constants: n/a
//      Labels: n/a
//
//    Side effects: n/a
void dl_source_message(unsigned int source, __declspec(gp_reg) unsigned int *msg)
{
  __declspec(gp_reg) unsigned int sring;

  // only procede once the lock is available
  while(dl_source_message_lock == LOCKED)
  {
    ctx_swap();
  }
  dl_source_message_lock = LOCKED;

  if(source == MESSAGE_IN_RING_0)
  {
    sring = ONL_XSCALE_TO_PLUGIN_0_CTRL_SRAM_RING;
  }
  else if(source == MESSAGE_IN_RING_1)
  {
    sring = ONL_XSCALE_TO_PLUGIN_1_CTRL_SRAM_RING;
  }
  else if(source == MESSAGE_IN_RING_2)
  {
    sring = ONL_XSCALE_TO_PLUGIN_2_CTRL_SRAM_RING;
  }
  else if(source == MESSAGE_IN_RING_3)
  {
    sring = ONL_XSCALE_TO_PLUGIN_3_CTRL_SRAM_RING;
  }
  else if(source == MESSAGE_IN_RING_4)
  {
    sring = ONL_XSCALE_TO_PLUGIN_4_CTRL_SRAM_RING;
  }
  else
  {
    return;
  }

  sram_ring_get_buffer_1word(sring, &msg[0]);
  while(msg[0] == 0)
  {
    ctx_swap();
    sram_ring_get_buffer_1word(sring, &msg[0]);
  }

  sram_ring_get_buffer_nwords(sring, &msg[1], ((msg[0]>>16) & 0xFF));

  dl_source_message_lock = UNLOCKED;
}

//-------------------------------------------------------------------
// sram_ring_get_buffer_1word
//
//    Description:
//       Dequeue 1 word from the given SRAM ring.
//
void sram_ring_get_buffer_1word(unsigned int ring_number, __declspec(gp_reg, aligned(4)) unsigned int* in)
{
  SIGNAL ring_signal;
  __declspec(sram_read_reg) unsigned int data[1];

  __declspec(sram) void* ring_addr =
       (__declspec(sram) void *) ((SRAM_CONTROL_RING_CHANNEL<<QDESC_CHANNEL_BITPOS) | (ring_number<<2));

  sram_get_ring(data, ring_addr, sizeof(data) / sizeof(data[0]), ctx_swap, &ring_signal);

  in[0] = data[0];
}

//-------------------------------------------------------------------
// sram_ring_get_buffer_nwords
//
//    Description:
//       Dequeue n words from the given SRAM ring.
//
void sram_ring_get_buffer_nwords(unsigned int ring_number, __declspec(gp_reg, aligned(4)) unsigned int* in, unsigned int n)
{
  unsigned int i;
  SIGNAL ring_signal;
  __declspec(sram_read_reg) unsigned int data[8];

  __declspec(sram) void* ring_addr =
       (__declspec(sram) void *) ((SRAM_CONTROL_RING_CHANNEL<<QDESC_CHANNEL_BITPOS) | (ring_number<<2));

  if(n < 1)
  {
    return;
  }

  i = 0;
  while(n > 8)
  {
    sram_get_ring(data, ring_addr, 8, ctx_swap, &ring_signal);
    in[i] = data[0];
    in[i+1] = data[1];
    in[i+2] = data[2];
    in[i+3] = data[3];
    in[i+4] = data[4];
    in[i+5] = data[5];
    in[i+6] = data[6];
    in[i+7] = data[7];
    i+=8;
    n-=8;
  }

  sram_get_ring(data, ring_addr, n, ctx_swap, &ring_signal);

  if(n > 0) { in[i++] = data[0]; }
  if(n > 1) { in[i++] = data[1]; }
  if(n > 2) { in[i++] = data[2]; }
  if(n > 3) { in[i++] = data[3]; }
  if(n > 4) { in[i++] = data[4]; }
  if(n > 5) { in[i++] = data[5]; }
  if(n > 6) { in[i++] = data[6]; }
  if(n > 7) { in[i++] = data[7]; }
}

//-------------------------------------------------------------------
// sram_ring_put_buffer_nwords
//
//    Description:
//       Enqueue n words on the given SRAM ring.
//       For now, also assume that thread should always try to enqueue the packet
//       repeatedly if the ring is full.
//
void sram_ring_put_buffer_nwords(unsigned int ring_number, __declspec(gp_reg, aligned(4)) unsigned int* in, unsigned int n)
{
  int i;
  SIGNAL_PAIR ring_signal;
  __declspec(sram_write_reg) unsigned int data[8];

  // The compiler is associating the size of the put data with the size of the returned status
  __declspec(sram_read_reg) unsigned int status[8];

  __declspec(sram) void* ring_addr =
       (__declspec(sram) void *) ((SRAM_CONTROL_RING_CHANNEL<<QDESC_CHANNEL_BITPOS) | (ring_number<<2));

  if(n < 1)
  {
    return;
  }

  i = 0;
  while(n > 8)
  {
    data[0] = in[i];
    data[1] = in[i+1];
    data[2] = in[i+2];
    data[3] = in[i+3];
    data[4] = in[i+4];
    data[5] = in[i+5];
    data[6] = in[i+6];
    data[7] = in[i+7];
    i+=8;
    n-=8;

    do
    {
      sram_put_ring(&status[0], data, ring_addr, 8, sig_done, &ring_signal);
      wait_for_all(&ring_signal);
    }
    while(!(status[0] & 0xf0000000));
  }

  if(n > 0) { data[0] = in[i++]; }
  if(n > 1) { data[1] = in[i++]; }
  if(n > 2) { data[2] = in[i++]; }
  if(n > 3) { data[3] = in[i++]; }
  if(n > 4) { data[4] = in[i++]; }
  if(n > 5) { data[5] = in[i++]; }
  if(n > 6) { data[6] = in[i++]; }
  if(n > 7) { data[7] = in[i++]; }

  do
  {
    sram_put_ring(&status[0], data, ring_addr, n, sig_done, &ring_signal);
    wait_for_all(&ring_signal);
  }
  while(!(status[0] & 0xf0000000));
}

static __forceinline void start_packet_rotation(SIGNAL *s)
{
  if(ctx() == FIRST_PACKET_THREAD)
  {
    if(FIRST_PACKET_THREAD == LAST_PACKET_THREAD)
    {
      signal_same_ME(__signal_number(s), FIRST_PACKET_THREAD);
    }
    else
    {
      signal_same_ME(__signal_number(s), (FIRST_PACKET_THREAD+1));
    }
    __implicit_write(s);
  }
}

static __forceinline void wait_packet_signal(SIGNAL *s)
{
  wait_for_all(s);
}

static __forceinline void send_packet_signal(SIGNAL *s)
{
  int c;
  
  c = ctx();
#if ( (FIRST_PACKET_THREAD) == (LAST_PACKET_THREAD) )
  if(c == FIRST_PACKET_THREAD)
  {
    signal_same_ME(__signal_number(s), FIRST_PACKET_THREAD);
    __implicit_write(s);
  }
#else
  if(c >= FIRST_PACKET_THREAD && c < LAST_PACKET_THREAD) 
  {
    signal_same_ME_next_ctx(__signal_number(s));
    __implicit_write(s);
  }
  else if(c == LAST_PACKET_THREAD)
  {
    signal_same_ME(__signal_number(s), FIRST_PACKET_THREAD);
    __implicit_write(s);
  }
#endif
}

#endif /* _PLUGIN_DL_C */
