/*
 * Copyright (c) 2007 John DeHart and Washington University in St. Louis.
 * All rights reserved
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *    1. Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *    2. Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *    3. The name of the author or Washington University may not be used 
 *       to endorse or promote products derived from this source code 
 *       without specific prior written permission.
 *    4. Conditions of any other entities that contributed to this are also
 *       met. If a copyright notice is present from another entity, it must
 *       be maintained in redistributions of the source code.
 *
 * THIS INTELLECTUAL PROPERTY (WHICH MAY INCLUDE BUT IS NOT LIMITED TO SOFTWARE,
 * FIRMWARE, VHDL, etc) IS PROVIDED BY THE AUTHOR AND WASHINGTON UNIVERSITY 
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR WASHINGTON UNIVERSITY 
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
 * ARISING IN ANY WAY OUT OF THE USE OF THIS INTELLECTUAL PROPERTY, EVEN IF 
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * */
/*
 * File: plugin_dl.c  
 * Author: Charlie Wiseman
 * Email: cgw1@arl.wustl.edu
 * Organization: Applied Research Laboratory
 * 
 * Derived from: dl_source.c (for ONL router)
 *
 * Date Created: 8/20/2007 
 * 
 * Description: Dispatch loop functionality tailored for plugins.
 *
 * Modification History:
 */

#ifndef _PLUGIN_DL_C
#define _PLUGIN_DL_C

#include "dl_system.h"
#include "system_init.h"
#include "dl_buf.c"
#include "dl_meta.h"
#include "scratch_rings_WU.h"
#include "sram_rings_WU.h"
#include "ring_formats.h"
#include "string.h"

#include "plugin_dl.h"

/* next two needed for using SRAM rings */
/*
#define SRAM_CONTROL_RING_CHANNEL 3
#define QDESC_CHANNEL_BITPOS 30

#ifdef DL_ORDERED
  SIGNAL dl_sink_packet_sig;
  SIGNAL dl_source_packet_sig;
#endif

#define UNLOCKED 0
#define LOCKED   1
__declspec(shared local_mem) unsigned int dl_sink_message_lock;
__declspec(shared local_mem) unsigned int dl_source_message_lock;
*/

//void sram_ring_get_buffer_1word(unsigned int,__declspec(gp_reg, aligned(4)) unsigned int *);
unsigned int sram_ring_get_buffer_1word(unsigned int);
//void sram_ring_get_buffer_nwords(unsigned int,__declspec(gp_reg, aligned(4)) unsigned int *, unsigned int);
//void sram_ring_put_buffer_nwords(unsigned int,__declspec(gp_reg, aligned(4)) unsigned int *, unsigned int);

static void start_packet_rotation(SIGNAL *);
static void wait_packet_signal(SIGNAL *);
static void send_packet_signal(SIGNAL *);

extern plc_plugin_data ring_in;
extern plugin_out_data ring_out;

int onl_api_debug_message(unsigned int sink, __declspec(local_mem) char *msg)
{
  __declspec(gp_reg) unsigned int message[8];
  __declspec(gp_reg) onl_api_ctrl_msg_hdr msghdr;
  //__declspec(gp_reg) unsigned int msglen;
  //__declspec(gp_reg) int i,j;

  //__declspec(local_mem) char padmsg[28];

  //msglen = strlen_lmem(msg);
  //if(msglen > 27)
  //{
  //  return -1;
  //}
  
  //for(i=0; i<msglen; ++i)
  //{
  //  padmsg[i] = msg[i];
  //}
  //for(i=msglen; i<28; ++i)
  //{
  //  padmsg[i] = '\0';
  //}

  msghdr.response_requested = 0;
  msghdr.type = CM_DEBUGMSG;
  msghdr.num_words = 7;
  msghdr.mid = 0;
  message[0] = msghdr.value;

  if(onl_api_str2intarr(msg, &message[1]) < 0)
  {
    return -1;
  }
  //for(i=1; i<8; ++i)
  //{
  //  j = (i-1)*4;
  //  message[i] = ((((unsigned int)(padmsg[j])) & 0xff) << 24) | ((((unsigned int)(padmsg[j+1])) & 0xff) << 16) | ((((unsigned int)(padmsg[j+2])) & 0xff) << 8) | (((unsigned int)(padmsg[j+3])) & 0xff);
  //}

  dl_sink_message(sink, message);

  return 0;
}

//-------------------------------------------------------------------
// dl_sink_init
//
//    Description:
//      Fix the value of the incoming ring-ready signal so RX can indicate when 
//      all rings are created. Start the signalling rotation for dl_sink.
//
//    Parameters:
//      Outputs: n/a
//      In/Outs: n.a
//      Inputs: n/a
//      Constants: n/a
//      Labels: n/a
//
//    Side effects: n/a
//
//    See also: n/a
//
void dl_sink_init()
{
  if(ctx() == 0)
  {
    // Fix the value of the incoming ring-ready signal so the
    // rx task can indicate when that ring is created.
    __assign_relative_register((void *)&rx_ring_ready_sig, ME_INIT_SIGNAL);

    dl_sink_message_lock = UNLOCKED;
  }
                        
#ifdef DL_ORDERED
  start_packet_rotation(&dl_sink_packet_sig);
#endif
}

//-------------------------------------------------------------------
// dl_source_init
//
//    Description:
//              Wait for the RX to signal creation of
//              the rings, and then enter the signalling rotation for dl_source
//
//    Parameters:
//      Outputs: n/a
//      In/Outs: n.a
//      Inputs: n/a
//      Constants: n/a
//      Labels: n/a
//
//    Side effects: n/a
//
//    See also: n/a
//
void dl_source_init()
{
  if (ctx() == 0)
  {
    // Wait for a signal from RX indicating the incoming ring is ready
    wait_for_all(&rx_ring_ready_sig);
    dl_source_message_lock = UNLOCKED;
  }

#ifdef DL_ORDERED
  start_packet_rotation(&dl_source_packet_sig);
#endif
}

///////////////////////////////////////////////////////////////////////////////
// dl_sink_packet:
//    Description:
//      Enqueue a packet from current processing block to the next processing
//		block, depending on which microengine
//		is running this.
//      dl_sink is called multiple times when multiple packets need to be enqueued.
//      dl_sink supports ordered thread execution if DL_ORDERED is defined. Enqueue
//              process is considered as a critical session and only one thread can
//              be enqueuing packets at a time. After the thread enqueues its last 
//              packet, it call exit_block() to pass the signal to the next thread. If 
//              the current packet is the first one to be enqueued, enter_block()
//              is called to wait for the signal from previous thread.
//
//    Parameters:
//      Outputs: n/a
//      In/Outs: n.a
//      Inputs: 
//              unsigned int sink:  where to sink packet to
//      Constants: n/a
//      Labels: n/a
//
//    Side effects: n/a
void dl_sink_packet(unsigned int sink)
{
#ifdef DL_ORDERED
  wait_packet_signal(&dl_sink_packet_sig);
#endif
    
  if(sink == QM)
  {   
    scr_ring_put_buffer_3word(PLUGIN_TO_QM_RING, ring_out.i, 0);
  }
  else if(sink == MUX)
  {
    sram_ring_put_buffer_3word(PLUGINS_TO_MUX_SRAM_RING, ring_out.i, 0);
    scratch_incr((void*)SCR_PLUGINS_TO_MUX_SRAM_RING_OCC_CNTR);
  }
  else if(sink == XSCALE_LD)
  {
    scr_ring_put_buffer_6word(TO_XSCALE_LD_RING, ring_out.i, 0);
  }
  else if(sink == XSCALE_EXC)
  {
    scr_ring_put_buffer_6word(TO_XSCALE_EXC_RING, ring_out.i, 0);
  }
  else if(sink == XSCALE_ERR)
  {
    scr_ring_put_buffer_6word(TO_XSCALE_ERR_RING, ring_out.i, 0);
  }
  else if(sink == PACKET_IN_RING_0)
  {
    sram_ring_put_buffer_6word(PLC_TO_PLUGIN_0_SRAM_RING, ring_out.i, 0);
  }
  else if(sink == PACKET_IN_RING_1)
  {
    sram_ring_put_buffer_6word(PLC_TO_PLUGIN_1_SRAM_RING, ring_out.i, 0);
  }
  else if(sink == PACKET_IN_RING_2)
  {
    sram_ring_put_buffer_6word(PLC_TO_PLUGIN_2_SRAM_RING, ring_out.i, 0);
  }
  else if(sink == PACKET_IN_RING_3)
  {
    sram_ring_put_buffer_6word(PLC_TO_PLUGIN_3_SRAM_RING, ring_out.i, 0);
  }
  else if(sink == PACKET_IN_RING_4)
  {
    sram_ring_put_buffer_6word(PLC_TO_PLUGIN_4_SRAM_RING, ring_out.i, 0);
  }
  else if(sink == DROP) // drop the packet
  {
    scr_ring_put_buffer_1word(DL_DROP_RING, ring_out.i, 0);
  }
  else if(sink == TX)
  {   
    scr_ring_put_buffer_5word(HF_TO_TX_RING, ring_out.i, 0);
  }
  else // assume sink == DO_NOTHING
  { 
  }

#ifdef DL_ORDERED
  send_packet_signal(&dl_sink_packet_sig);
#endif  
}

///////////////////////////////////////////////////////////////////////////////
// dl_source_packet:
// Description:
//      Dequeue a packet from previous processing block, depending on which microengine
//		is running this.
//      dl_source is called once to dequeue only one packet from a specified ring.
//      dl_source supports ordered thread execution if DL_ORDERED is defined. Dequeue
//              process is considered as a critical session and only one thread can
//              be dequeuing packets at a time. 
//
//    Parameters:
//      Outputs: n/a
//      In/Outs: n/a
//      Inputs: unsigned int source - id of the previous block from which a packet is to be received.
//      Constants: n/a
//      Labels: n/a
//
//    Side effects: n/a
void dl_source_packet(unsigned int source)
{
  __declspec(gp_reg) unsigned int sring;

#ifdef DL_ORDERED
  wait_packet_signal(&dl_source_packet_sig);
#endif

  if(source == PACKET_IN_RING_0)
  {
    sring = PLC_TO_PLUGIN_0_SRAM_RING;
  }
  else if(source == PACKET_IN_RING_1)
  {
    sring = PLC_TO_PLUGIN_1_SRAM_RING;
  }
  else if(source == PACKET_IN_RING_2)
  {
    sring = PLC_TO_PLUGIN_2_SRAM_RING;
  }
  else if(source == PACKET_IN_RING_3)
  {
    sring = PLC_TO_PLUGIN_3_SRAM_RING;
  }
  else if(source == PACKET_IN_RING_4)
  {
    sring = PLC_TO_PLUGIN_4_SRAM_RING;
  }
  else 
  {
    return;
  }

  sram_ring_get_buffer_6word(sring, ring_in.i);
  while (ring_in.i[0] == 0)
  {
      ctx_swap();
      sram_ring_get_buffer_6word(sring, ring_in.i);
  }

#ifdef DL_ORDERED
  send_packet_signal(&dl_source_packet_sig);
#endif
}

///////////////////////////////////////////////////////////////////////////////
// dl_sink_message:
// Description:
//      Enqueue a control message to XScale.  A local memory variable is used
//      to ensure only one thread is reading from the ring at any one time.
//
//    Parameters:
//      Outputs: n/a
//      In/Outs: n/a
//      Inputs: unsigned int source - id of the previous block from which a packet is to be received.
//      Constants: n/a
//      Labels: n/a
//
//    Side effects: n/a
/*
void dl_sink_message(unsigned int sink, __declspec(gp_reg, aligned(4)) unsigned int *msg)
{
  __declspec(gp_reg) unsigned int sring;

  // only procede once the lock is available
  while(dl_sink_message_lock == LOCKED)
  {
    ctx_swap();
  }
  dl_sink_message_lock = LOCKED;

  if(sink == MESSAGE_OUT_RING_0)
  {
    sring = ONL_PLUGIN_0_TO_XSCALE_CTRL_SRAM_RING;
  }
  else if(sink == MESSAGE_OUT_RING_1)
  {
    sring = ONL_PLUGIN_1_TO_XSCALE_CTRL_SRAM_RING;
  }
  else if(sink == MESSAGE_OUT_RING_2)
  {
    sring = ONL_PLUGIN_2_TO_XSCALE_CTRL_SRAM_RING;
  }
  else if(sink == MESSAGE_OUT_RING_3)
  {
    sring = ONL_PLUGIN_3_TO_XSCALE_CTRL_SRAM_RING;
  }
  else if(sink == MESSAGE_OUT_RING_4)
  {
    sring = ONL_PLUGIN_4_TO_XSCALE_CTRL_SRAM_RING;
  }
  else
  {
    return;
  }


  sram_ring_put_buffer_nwords(sring, &msg[0], (((msg[0]>>16) & 0xFF)+1));

  dl_sink_message_lock = UNLOCKED;
}
*/

///////////////////////////////////////////////////////////////////////////////
// dl_source_message:
// Description:
//      Dequeue a control message from XScale.  A local memory variable is used
//      to ensure only one thread is reading from the ring at any one time.
//
//    Parameters:
//      Outputs: n/a
//      In/Outs: n/a
//      Inputs: unsigned int source - id of the previous block from which a packet is to be received.
//      Constants: n/a
//      Labels: n/a
//
//    Side effects: n/a
void dl_source_message(unsigned int source, __declspec(gp_reg) unsigned int *msg)
{
  __declspec(gp_reg) unsigned int sring;
  __declspec(gp_reg) unsigned int i;
  __declspec(gp_reg) unsigned int n;

  // only procede once the lock is available
  while(dl_source_message_lock == LOCKED)
  {
    ctx_swap();
  }
  dl_source_message_lock = LOCKED;

  if(source == MESSAGE_IN_RING_0)
  {
    sring = ONL_XSCALE_TO_PLUGIN_0_CTRL_SRAM_RING;
  }
  else if(source == MESSAGE_IN_RING_1)
  {
    sring = ONL_XSCALE_TO_PLUGIN_1_CTRL_SRAM_RING;
  }
  else if(source == MESSAGE_IN_RING_2)
  {
    sring = ONL_XSCALE_TO_PLUGIN_2_CTRL_SRAM_RING;
  }
  else if(source == MESSAGE_IN_RING_3)
  {
    sring = ONL_XSCALE_TO_PLUGIN_3_CTRL_SRAM_RING;
  }
  else if(source == MESSAGE_IN_RING_4)
  {
    sring = ONL_XSCALE_TO_PLUGIN_4_CTRL_SRAM_RING;
  }
  else
  {
    return;
  }

  msg[0] = sram_ring_get_buffer_1word(sring);
  while(msg[0] == 0)
  {
    ctx_swap();
    msg[0] = sram_ring_get_buffer_1word(sring);
  }

  n = (msg[0]>>16) & 0xFF;

  if(n >= 1)
  {
    msg[1] = sram_ring_get_buffer_1word(sring);
    while(msg[1] == 0)
    {
      ctx_swap();
      msg[1] = sram_ring_get_buffer_1word(sring);
    }
  }
  if(n >= 2)
  {
    msg[2] = sram_ring_get_buffer_1word(sring);
    while(msg[2] == 0)
    {
      ctx_swap();
      msg[2] = sram_ring_get_buffer_1word(sring);
    }
  }
  if(n >= 3)
  {
    msg[3] = sram_ring_get_buffer_1word(sring);
    while(msg[3] == 0)
    {
      ctx_swap();
      msg[3] = sram_ring_get_buffer_1word(sring);
    }
  }
  if(n >= 4)
  {
    msg[4] = sram_ring_get_buffer_1word(sring);
    while(msg[4] == 0)
    {
      ctx_swap();
      msg[4] = sram_ring_get_buffer_1word(sring);
    }
  }
  if(n >= 5)
  {
    msg[5] = sram_ring_get_buffer_1word(sring);
    while(msg[5] == 0)
    {
      ctx_swap();
      msg[5] = sram_ring_get_buffer_1word(sring);
    }
  }
  if(n >= 6)
  {
    msg[6] = sram_ring_get_buffer_1word(sring);
    while(msg[6] == 0)
    {
      ctx_swap();
      msg[6] = sram_ring_get_buffer_1word(sring);
    }
  }
  if(n >= 7)
  {
    msg[7] = sram_ring_get_buffer_1word(sring);
    while(msg[7] == 0)
    {
      ctx_swap();
      msg[7] = sram_ring_get_buffer_1word(sring);
    }
  }

  dl_source_message_lock = UNLOCKED;
}

//-------------------------------------------------------------------
// sram_ring_get_buffer_1word
//
//    Description:
//       Dequeue 1 word from the given SRAM ring.
//
unsigned int sram_ring_get_buffer_1word(unsigned int ring_number)
{
  SIGNAL ring_signal;
  __declspec(sram_read_reg) unsigned int data[1];

  __declspec(sram) void* ring_addr =
       (__declspec(sram) void *) ((SRAM_CONTROL_RING_CHANNEL<<QDESC_CHANNEL_BITPOS) | (ring_number<<2));

  sram_get_ring(data, ring_addr, sizeof(data) / sizeof(data[0]), ctx_swap, &ring_signal);

  return data[0];
}

//-------------------------------------------------------------------
// sram_ring_get_buffer_nwords
//
//    Description:
//       Dequeue n word from the given SRAM ring.  Currently limited to 8 words max.
//
/*
void sram_ring_get_buffer_nwords(unsigned int ring_number, __declspec(gp_reg, aligned(4)) unsigned int* in, unsigned int n)
{
  int i;
  SIGNAL ring_signal;
  __declspec(sram_read_reg) unsigned int data[8];

  __declspec(sram) void* ring_addr =
       (__declspec(sram) void *) ((SRAM_CONTROL_RING_CHANNEL<<QDESC_CHANNEL_BITPOS) | (ring_number<<2));

  if(n < 1)
  {
    return;
  }

  sram_get_ring(data, ring_addr, n, ctx_swap, &ring_signal);

  if (n > 0) in[0] = data[0];
  if (n > 1) in[1] = data[1];
  if (n > 2) in[2] = data[2];
  if (n > 3) in[3] = data[3];
  if (n > 4) in[4] = data[4];
  if (n > 5) in[5] = data[5];
  if (n > 6) in[6] = data[6];
  if (n > 7) in[7] = data[7];
}
*/

//-------------------------------------------------------------------
// sram_ring_put_buffer_nwords
//
//    Description:
//       Enqueue n words on the given SRAM ring.  Currently limited to 8 words max.
//       For now, also assume that thread should always try to enqueue the packet
//       repeatedly if the ring is full.
//
/*
void sram_ring_put_buffer_nwords(unsigned int ring_number, __declspec(gp_reg, aligned(4)) unsigned int* in, unsigned int n)
{
  int i;
  SIGNAL_PAIR ring_signal;
  __declspec(sram_write_reg) unsigned int data[8];

  // The compiler is associating the size of the put data with the size of the returned status
  __declspec(sram_read_reg) unsigned int status[8];

  __declspec(sram) void* ring_addr =
       (__declspec(sram) void *) ((SRAM_CONTROL_RING_CHANNEL<<QDESC_CHANNEL_BITPOS) | (ring_number<<2));

  data[0] = in[0];
  data[1] = in[1];
  data[2] = in[2];
  data[3] = in[3];
  data[4] = in[4];
  data[5] = in[5];
  data[6] = in[6];
  data[7] = in[7];

  do
  {
    sram_put_ring(&status[0], data, ring_addr, n, sig_done, &ring_signal);
    wait_for_all(&ring_signal);
  }
  while(!(status[0] & 0xf0000000));
}
*/

static __forceinline void start_packet_rotation(SIGNAL *s)
{
  if(ctx() == FIRST_PACKET_THREAD)
  {
    if(FIRST_PACKET_THREAD == LAST_PACKET_THREAD)
    {
      signal_same_ME(__signal_number(s), FIRST_PACKET_THREAD);
    }
    else
    {
      signal_same_ME(__signal_number(s), (FIRST_PACKET_THREAD+1));
    }
    __implicit_write(s);
  }
}

static __forceinline void wait_packet_signal(SIGNAL *s)
{
  wait_for_all(s);
}

static __forceinline void send_packet_signal(SIGNAL *s)
{
  int c;
  
  c = ctx();
#if ( (FIRST_PACKET_THREAD) == (LAST_PACKET_THREAD) )
  if(c == FIRST_PACKET_THREAD)
  {
    signal_same_ME(__signal_number(s), FIRST_PACKET_THREAD);
    __implicit_write(s);
  }
#else
  if(c >= FIRST_PACKET_THREAD && c < LAST_PACKET_THREAD) 
  {
    signal_same_ME_next_ctx(__signal_number(s));
    __implicit_write(s);
  }
  else if(c == LAST_PACKET_THREAD)
  {
    signal_same_ME(__signal_number(s), FIRST_PACKET_THREAD);
    __implicit_write(s);
  }
#endif
}

#endif /* _PLUGIN_DL_C */
