/*
 * Copyright (c) 2007 John DeHart and Washington University in St. Louis.
 * All rights reserved
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *    1. Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *    2. Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *    3. The name of the author or Washington University may not be used 
 *       to endorse or promote products derived from this source code 
 *       without specific prior written permission.
 *    4. Conditions of any other entities that contributed to this are also
 *       met. If a copyright notice is present from another entity, it must
 *       be maintained in redistributions of the source code.
 *
 * THIS INTELLECTUAL PROPERTY (WHICH MAY INCLUDE BUT IS NOT LIMITED TO SOFTWARE,
 * FIRMWARE, VHDL, etc) IS PROVIDED BY THE AUTHOR AND WASHINGTON UNIVERSITY 
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR WASHINGTON UNIVERSITY 
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
 * ARISING IN ANY WAY OUT OF THE USE OF THIS INTELLECTUAL PROPERTY, EVEN IF 
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * */
/*
 * File: plugin_dl.h  
 * Author: Charlie Wiseman
 * Email: cgw1@arl.wustl.edu
 * Organization: Applied Research Laboratory
 * 
 * Derived from: dl_source.h (for ONL router)
 *
 * Date Created: 8/20/2007 
 * 
 * Description: Dispatch loop functions tailored for plugins.
 *
 * Modification History:
 *
 */

#ifndef _PLUGIN_DL_H
#define _PLUGIN_DL_H

/* rings */
#define QM                  0
#define MUX                 1
#define XSCALE              2
#define XSCALE_LD           3
#define XSCALE_EXC          4
#define XSCALE_ERR          5
#define PACKET_IN_RING_0    6
#define PACKET_IN_RING_1    7
#define PACKET_IN_RING_2    8
#define PACKET_IN_RING_3    9
#define PACKET_IN_RING_4   10
#define MESSAGE_OUT_RING_0 11
#define MESSAGE_OUT_RING_1 12
#define MESSAGE_OUT_RING_2 13
#define MESSAGE_OUT_RING_3 14
#define MESSAGE_OUT_RING_4 15
#define MESSAGE_IN_RING_0  16
#define MESSAGE_IN_RING_1  17
#define MESSAGE_IN_RING_2  18
#define MESSAGE_IN_RING_3  19
#define MESSAGE_IN_RING_4  20
#define DROP               21
#define DO_NOTHING         22
#define TX                 23

#define CM_CONTROLMSG        0 
#define CM_CONTROLMSGRSP     1
#define CM_DEBUGMSG          2

typedef __declspec(packed) union onl_api_u_ctrl_msg_hdr  
{
  struct
  {
    unsigned int response_requested: 1;
    unsigned int type              : 7;
    unsigned int num_words         : 8;
    unsigned int mid               : 16;

  };
  unsigned int value;
} onl_api_ctrl_msg_hdr;

// After rx is done with ring initialization, this signal is sent by the rx task to indicate that
// the plugin code can begin to dequeue/enqueue packets from/onto scratch or sram rings
__declspec(visible) SIGNAL rx_ring_ready_sig;

/* next two needed for using SRAM rings */
#define SRAM_CONTROL_RING_CHANNEL 3
#define QDESC_CHANNEL_BITPOS 30

#ifdef DL_ORDERED
  SIGNAL dl_sink_packet_sig;
  SIGNAL dl_source_packet_sig;
#endif

#define UNLOCKED 0
#define LOCKED   1
__declspec(shared local_mem) unsigned int dl_sink_message_lock;
__declspec(shared local_mem) unsigned int dl_source_message_lock;

//-------------------------------------------------------------------
// sram_ring_put_buffer_nwords
//
//    Description:
//       Enqueue n words on the given SRAM ring.  Currently limited to 8 words max.
//       For now, also assume that thread should always try to enqueue the packet
//       repeatedly if the ring is full.
//
__forceinline void sram_ring_put_buffer_nwords(unsigned int ring_number, __declspec(gp_reg, aligned(4)) unsigned int* in, unsigned int n)
{
  int i;
  SIGNAL_PAIR ring_signal;
  __declspec(sram_write_reg) unsigned int data[8];

  // The compiler is associating the size of the put data with the size of the returned status
  __declspec(sram_read_reg) unsigned int status[8];

  __declspec(sram) void* ring_addr =
       (__declspec(sram) void *) ((SRAM_CONTROL_RING_CHANNEL<<QDESC_CHANNEL_BITPOS) | (ring_number<<2));

 /*( for(i=0; i<n; i++)
  {
    data[i] = in[i];
  }
  */
  data[0] = in[0];
  data[1] = in[1];
  data[2] = in[2];
  data[3] = in[3];
  data[4] = in[4];
  data[5] = in[5];
  data[6] = in[6];
  data[7] = in[7];

  do
  {
    sram_put_ring(&status[0], data, ring_addr, n, sig_done, &ring_signal);
    wait_for_all(&ring_signal);
  }
  while(!(status[0] & 0xf0000000));
}

///////////////////////////////////////////////////////////////////////////////
// dl_sink_message:
// Description:
//      Enqueue a control message to XScale.  A local memory variable is used
//      to ensure only one thread is reading from the ring at any one time.
//
//    Parameters:
//      Outputs: n/a
//      In/Outs: n/a
//      Inputs: unsigned int source - id of the previous block from which a packet is to be
//      received.
//      Constants: n/a
//      Labels: n/a
//
//    Side effects: n/a
__forceinline void dl_sink_message(unsigned int sink, __declspec(gp_reg, aligned(4)) unsigned int *msg)
{
  __declspec(gp_reg) unsigned int sring;

  // only procede once the lock is available
  while(dl_sink_message_lock == LOCKED)
  {
    ctx_swap();
  }
  dl_sink_message_lock = LOCKED;

  if(sink == MESSAGE_OUT_RING_0)
  {
    sring = ONL_PLUGIN_0_TO_XSCALE_CTRL_SRAM_RING;
  }
  else if(sink == MESSAGE_OUT_RING_1)
  {
    sring = ONL_PLUGIN_1_TO_XSCALE_CTRL_SRAM_RING;
  }
  else if(sink == MESSAGE_OUT_RING_2)
  {
    sring = ONL_PLUGIN_2_TO_XSCALE_CTRL_SRAM_RING;
  }
  else if(sink == MESSAGE_OUT_RING_3)
  {
    sring = ONL_PLUGIN_3_TO_XSCALE_CTRL_SRAM_RING;
  }
  else if(sink == MESSAGE_OUT_RING_4)
  {
    sring = ONL_PLUGIN_4_TO_XSCALE_CTRL_SRAM_RING;
  }
  else
  {
    return;
  }

  sram_ring_put_buffer_nwords(sring, &msg[0], (((msg[0]>>16) & 0xFF)+1));

  dl_sink_message_lock = UNLOCKED;
}

// fills valstr with the hexadecimal representation of val
__forceinline void onl_api_int2str(__declspec(gp_reg) unsigned int val, __declspec(local_mem) char valstr[9])
{
  __declspec(gp_reg) unsigned int c;
  __declspec(gp_reg) unsigned int i = 0;
  __declspec(gp_reg) unsigned int found_non_zero = 0;

  c = val >> 28;
  if(found_non_zero == 1 || c != 0)
  {
    found_non_zero = 1;
    if(c > 9) { valstr[i++] = c - 10 + 'a'; }
    else { valstr[i++] = c + '0'; }
  }
  c = (val >> 24) & 0xf;
  if(found_non_zero == 1 || c != 0)
  {
    found_non_zero = 1;
    if(c > 9) { valstr[i++] = c - 10 + 'a'; }
    else { valstr[i++] = c + '0'; }
  }
  c = (val >> 20) & 0xf;
  if(found_non_zero == 1 || c != 0)
  {
    found_non_zero = 1;
    if(c > 9) { valstr[i++] = c - 10 + 'a'; }
    else { valstr[i++] = c + '0'; }
  }
  c = (val >> 16) & 0xf;
  if(found_non_zero == 1 || c != 0)
  {
    found_non_zero = 1;
    if(c > 9) { valstr[i++] = c - 10 + 'a'; }
    else { valstr[i++] = c + '0'; }
  }
  c = (val >> 12) & 0xf;
  if(found_non_zero == 1 || c != 0)
  {
    found_non_zero = 1;
    if(c > 9) { valstr[i++] = c - 10 + 'a'; }
    else { valstr[i++] = c + '0'; }
  }
  c = (val >> 8) & 0xf;
  if(found_non_zero == 1 || c != 0)
  {
    found_non_zero = 1;
    if(c > 9) { valstr[i++] = c - 10 + 'a'; }
    else { valstr[i++] = c + '0'; }
  }
  c = (val >> 4) & 0xf;
  if(found_non_zero == 1 || c != 0)
  {
    found_non_zero = 1;
    if(c > 9) { valstr[i++] = c - 10 + 'a'; }
    else { valstr[i++] = c + '0'; }
  }
  c = val & 0xf;
  if(found_non_zero == 1 || c != 0)
  {
    found_non_zero = 1;
    if(c > 9) { valstr[i++] = c - 10 + 'a'; }
    else { valstr[i++] = c + '0'; }
  }
  else {
    // value must have been 0
    valstr[i++] = '0';
  }
  valstr[i] = '\0';
}

__forceinline int onl_api_str2intarr(__declspec(local_mem) char *msg, __declspec(gp_reg) unsigned int intarr[7])
{
  __declspec(gp_reg) unsigned int msglen;
  __declspec(gp_reg) int i,j;
  __declspec(local_mem) char padmsg[28];

  msglen = strlen_lmem(msg);
  if(msglen > 27)
  {
    return -1;
  }

  for(i=0; i<msglen; ++i)
  {
    padmsg[i] = msg[i];
  }
  padmsg[i] = '\0';
  for(i=msglen+1; i<28; ++i)
  {
    padmsg[i] = 0xff;
  }

  for(i=0; i<7; ++i)
  {
    j = i*4;
  }
  intarr[0] = ((((unsigned int)(padmsg[0])) & 0xff) << 24) | ((((unsigned int)(padmsg[1])) & 0xff) << 16) | ((((unsigned int)(padmsg[2])) & 0xff) << 8) | (((unsigned int)(padmsg[3])) & 0xff);
  intarr[1] = ((((unsigned int)(padmsg[4])) & 0xff) << 24) | ((((unsigned int)(padmsg[5])) & 0xff) << 16) | ((((unsigned int)(padmsg[6])) & 0xff) << 8) | (((unsigned int)(padmsg[7])) & 0xff);
  intarr[2] = ((((unsigned int)(padmsg[8])) & 0xff) << 24) | ((((unsigned int)(padmsg[9])) & 0xff) << 16) | ((((unsigned int)(padmsg[10])) & 0xff) << 8) | (((unsigned int)(padmsg[11])) & 0xff);
  intarr[3] = ((((unsigned int)(padmsg[12])) & 0xff) << 24) | ((((unsigned int)(padmsg[13])) & 0xff) << 16) | ((((unsigned int)(padmsg[14])) & 0xff) << 8) | (((unsigned int)(padmsg[15])) & 0xff);
  intarr[4] = ((((unsigned int)(padmsg[16])) & 0xff) << 24) | ((((unsigned int)(padmsg[17])) & 0xff) << 16) | ((((unsigned int)(padmsg[18])) & 0xff) << 8) | (((unsigned int)(padmsg[19])) & 0xff);
  intarr[5] = ((((unsigned int)(padmsg[20])) & 0xff) << 24) | ((((unsigned int)(padmsg[21])) & 0xff) << 16) | ((((unsigned int)(padmsg[22])) & 0xff) << 8) | (((unsigned int)(padmsg[23])) & 0xff);
  intarr[6] = ((((unsigned int)(padmsg[24])) & 0xff) << 24) | ((((unsigned int)(padmsg[25])) & 0xff) << 16) | ((((unsigned int)(padmsg[26])) & 0xff) << 8) | (((unsigned int)(padmsg[27])) & 0xff);

  return 0;
}

__forceinline void onl_api_intarr2str(__declspec(gp_reg) unsigned int intarr[7], __declspec(local_mem) char msg[28])
{
  msg[0] = (intarr[0] >> 24) & 0xff;
  msg[1] = (intarr[0] >> 16) & 0xff;
  msg[2] = (intarr[0] >> 8) & 0xff;
  msg[3] = (intarr[0]) & 0xff;

  msg[4] = (intarr[1] >> 24) & 0xff;
  msg[5] = (intarr[1] >> 16) & 0xff;
  msg[6] = (intarr[1] >> 8) & 0xff;
  msg[7] = (intarr[1]) & 0xff;

  msg[8] = (intarr[2] >> 24) & 0xff;
  msg[9] = (intarr[2] >> 16) & 0xff;
  msg[10] = (intarr[2] >> 8) & 0xff;
  msg[11] = (intarr[2]) & 0xff;

  msg[12] = (intarr[3] >> 24) & 0xff;
  msg[13] = (intarr[3] >> 16) & 0xff;
  msg[14] = (intarr[3] >> 8) & 0xff;
  msg[15] = (intarr[3]) & 0xff;

  msg[16] = (intarr[4] >> 24) & 0xff;
  msg[17] = (intarr[4] >> 16) & 0xff;
  msg[18] = (intarr[4] >> 8) & 0xff;
  msg[19] = (intarr[4]) & 0xff;

  msg[20] = (intarr[5] >> 24) & 0xff;
  msg[21] = (intarr[5] >> 16) & 0xff;
  msg[22] = (intarr[5] >> 8) & 0xff;
  msg[23] = (intarr[5]) & 0xff;

  msg[24] = (intarr[6] >> 24) & 0xff;
  msg[25] = (intarr[6] >> 16) & 0xff;
  msg[26] = (intarr[6] >> 8) & 0xff;
  msg[27] = (intarr[6]) & 0xff;
}

//-------------------------------------------------------------------
// Debugging - add a msg to a log file
//-------------------------------------------------------------------
int onl_api_debug_message(unsigned int sink, __declspec(local_mem) char *msg);

//-------------------------------------------------------------------
// dl_sink_init
//
//    Description:
//      Fix the value of the incoming ring-ready signal so RX can indicate when 
//      all rings are created. Start the signalling rotation for dl_sink.
extern void dl_sink_init();

//-------------------------------------------------------------------
// dl_source_init
//
//    Description:
//              Wait for the RX to signal creation of
//              the rings, and then enter the signalling rotation for dl_source
extern void dl_source_init();

///////////////////////////////////////////////////////////////////////////////
// dl_sink_packet:
//    Description:
//      Enqueue a packet from current processing block to the next processing
//		block, depending on which microengine
//		is running this.
extern void dl_sink_packet(unsigned int sink);

///////////////////////////////////////////////////////////////////////////////
// dl_source_packet:
// Description:
//      Dequeue a packet from previous processing block, depending on which microengine
//		is running this.
extern void dl_source_packet(unsigned int source);

///////////////////////////////////////////////////////////////////////////////
// dl_sink_message:
//    Description:
//      Enqueue a control message back to XScale
//extern void dl_sink_message(unsigned int sink, __declspec(gp_reg, aligned(4)) unsigned int *msg);

///////////////////////////////////////////////////////////////////////////////
// dl_source_message:
// Description:
//      Dequeue a control message from the XScale
extern void dl_source_message(unsigned int source, __declspec(gp_reg) unsigned int *msg);

#endif	// _PLUGIN_DL_H
