// Made small change to delay.c.8:  kenw_ultoa now left-justifies the ascii
// decimal number instead of right-justifies

/*
 * Copyright (c) 2008 Washington University in St. Louis.
 * All rights reserved
 *
 *  Redistribution and use in source and binary forms, with or without
 *  modification, are permitted provided that the following conditions
 *  are met:
 *    1. Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *    2. Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *    3. The name of the author or Washington University may not be used 
 *       to endorse or promote products derived from this source code 
 *       without specific prior written permission.
 *    4. Conditions of any other entities that contributed to this are also
 *       met. If a copyright notice is present from another entity, it must
 *       be maintained in redistributions of the source code.
 *
 * THIS INTELLECTUAL PROPERTY (WHICH MAY INCLUDE BUT IS NOT LIMITED TO SOFTWARE,
 * FIRMWARE, VHDL, etc) IS PROVIDED BY THE AUTHOR AND WASHINGTON UNIVERSITY 
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR WASHINGTON UNIVERSITY 
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
 * ARISING IN ANY WAY OUT OF THE USE OF THIS INTELLECTUAL PROPERTY, EVEN IF 
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * */
/*
 * File: delay.c  
 * Author: Ken Wong
 * Email: kenw@arl.wustl.edu
 * Organization: Applied Research Laboratory
 * 
 * Derived from: pluginFramework/nstat.c
 *
 * Date Created: 06/09/2008 
 * 
 * Description: delay pkt n msec
 *
 * Modification History:
 *
 */

#ifndef _PLUGIN_DELAY_C
#define _PLUGIN_DELAY_C


//
//
// NOTE: See pluginFramework/null.c (null plugin) 
//       for description pre-processors values
//
//

#include <memory.h>
#include "plugin_api.h"
#include "plugin_dl.h"

#include "scratch_rings_WU.h"

//#ifdef DL_ORDERED
//    SIGNAL dl_sink_packet_sig;
//    SIGNAL dl_source_packet_sig;
//#endif

//-----------------------------------------------------------
// typedefs, unions, enums
//-----------------------------------------------------------
union tm_tag {
    long long	tm;
    struct {
	unsigned long	hi;
	unsigned long	lo;
    }	tm2;
};

struct delay_item_tag {
    union tm_tag time;
    unsigned int buf_handle;
    unsigned int out_port;
    unsigned int qid;
    unsigned int l3_pkt_len;
    struct delay_item_tag *next;
};

struct delayq_tag {
    unsigned long		ninq;	// # in delay queue
    struct delay_item_tag	*hd;	// head ptr
    struct delay_item_tag	*tl;	// tail ptr
    struct delay_item_tag	*free_hd;	// free list
};


//-----------------------------------------------------------
// Global variables/Registers
//-----------------------------------------------------------
// thread-specific globals 
__declspec(gp_reg) int dlNextBlock;  // where to send packets to next
__declspec(gp_reg) int dlFromBlock;  // where to get packets from
__declspec(gp_reg) int msgNextBlock; // where to send control messages to next
__declspec(gp_reg) int msgFromBlock; // where to get control messages from

// see ring_formats.h for struct definitions
volatile __declspec(gp_reg) plc_plugin_data ring_in;  // ring data from PLC
volatile __declspec(gp_reg) plugin_out_data ring_out; // ring data to next block

const unsigned int SLEEP_CYCLES = 16;  // cycles between callbacks (10 usec)
//XXX __declspec(local_mem) unsigned int timeout = 1400;  // ticks between callbacks
						//	10 usec
__declspec(gp_reg) unsigned int pluginId;	// plugin id (0...7)
__declspec(shared gp_reg) unsigned int delay = 50;// delay (msec)
//XXX__declspec(shared gp_reg) unsigned int ninq = 0;  // # queued pkts
__declspec(shared gp_reg) unsigned int npkts = 0; // # pkts seen by plugin
__declspec(shared gp_reg) unsigned int ndrops = 0; // # pkts dropped
__declspec(shared) union tm_tag z;	// departure time

//------
#define UNLOCKED 0
#define LOCKED   1
__declspec(shared gp_reg) unsigned int delayq_lock;

#define	MAX_QUEUE_SZ	35000
__declspec(shared, sram) struct delayq_tag delayq;

__declspec(shared sram) unsigned int xbuf_handle;
__declspec(shared sram) unsigned int xout_port;
__declspec(shared sram) unsigned int xqid;
__declspec(shared sram) unsigned int xl3_pkt_len;
//------


#ifdef DEBUG
__declspec(shared gp_reg) unsigned int pkt_count;
#endif

//-----------------------------------------------------------
// Function prototypes
//-----------------------------------------------------------
void handle_pkt_user();
void handle_msg_user();
void plugin_init_user();
int delayq_init( __declspec(shared, sram) struct delayq_tag *qptr );

static void wait_packet_signal(SIGNAL *);
static void send_packet_signal(SIGNAL *);

//-----------------------------------------------------------
// Some Useful Functions
//-----------------------------------------------------------
//
// kenw_dbg_message
// diff_msec
// msec2_cycles
// wait_packet_signal
// send_packet_signal
// dl_source_nopacket
// dl_sink_nopacket
// nxt_token
// kenw_ultoa_sram
// kenw_ul2hex
//

// transfer debug msg from sram to local memory
//
static __forceinline void
kenw_dbg_message( __declspec(local_mem) char *dbgmsg,
			__declspec(sram) char *mymsg, unsigned int nbytes ) {
    memcpy_lmem_sram( dbgmsg, mymsg, nbytes );
    onl_api_debug_message( msgNextBlock, dbgmsg );
}

//
// return difference of 2 timestamps in msec
//
static __forceinline unsigned long
diff_msec(	__declspec(local_mem) long long t2,
		__declspec(local_mem) long long t1 ) {
    __declspec(gp_reg)	  unsigned long msec;
    msec = ( ((t2 - t1)<<4) /100000)/14;		// 16*delta/10^5/14
    //XXX msec = (16*(t2 - t1)/100000)/14;		// 16*delta/10^5/14
    return msec;
}

// convert msec to #ME cycles assuming 1.4 GHz ME
//
// CAVEAT:  msec <= 49000 msec
//
static __forceinline unsigned long
msec2cycles ( __declspec(gp_reg) unsigned long msec ) {
    __declspec(gp_reg)	  unsigned long cycles;
    cycles = ( 14*msec*100000 );
    //XXX cycles = ( 14*msec*100000 ) && 0xfffffff0;
    return cycles;
}

// wait for the packet signal
static __forceinline void
wait_packet_signal(SIGNAL *s) {
    wait_for_all(s);
}

// send the packet signal
static __forceinline void
send_packet_signal(SIGNAL *s) {
    int c;
  
    c = ctx();
#if ( (FIRST_PACKET_THREAD) == (LAST_PACKET_THREAD) )
    if(c == FIRST_PACKET_THREAD)
    {
	signal_same_ME(__signal_number(s), FIRST_PACKET_THREAD);
	__implicit_write(s);
    }
#else
    if(c >= FIRST_PACKET_THREAD && c < LAST_PACKET_THREAD) 
    {
	signal_same_ME_next_ctx(__signal_number(s));
	__implicit_write(s);
    }
    else if(c == LAST_PACKET_THREAD)
    {
	signal_same_ME(__signal_number(s), FIRST_PACKET_THREAD);
	__implicit_write(s);
    }
#endif
}

// Same as dl_source_packet() but no pkt will be dequeued from the previous
//	processing block
//
void dl_source_nopacket( void )
{
#ifdef DL_ORDERED
    wait_packet_signal(&dl_source_packet_sig);
#endif

#ifdef DL_ORDERED
    send_packet_signal(&dl_source_packet_sig);
#endif
}

// Same as dl_sink_packet() but no pkt will be enqueued to the next
//	processing block
//
void dl_sink_nopacket( void )
{
#ifdef DL_ORDERED
    wait_packet_signal(&dl_sink_packet_sig);
#endif

#ifdef DL_ORDERED
    send_packet_signal(&dl_sink_packet_sig);
#endif  
}

// return address of next word following the current word where a word is
// 	any char sequence terminated by the space character.  Limit the
// 	search to n characters.  Return 0 if no word found.
// If the string is "xxx  yyy" with p pointing to the first 'x',
// 	nxt_token( p, 7 ) returns addr of first 'y'; nxt_token( p, 4 )
// 	returns 0.
//
static __forceinline __declspec(sram) char *
nxt_token( __declspec(sram) char *p, __declspec(gp_reg) int n ) {
    __declspec(sram) char *pend;

    pend = p + n;

    while( p<pend ) {		// find SP char
    	if( *p == ' ' )	break;
	p++;
    }
    if( p >= pend )	return 0;
    ++p;

    while( p<pend ) {		// scan past SP characters
    	if( *p == ' ' )	++p;
	else		break;
    }

    if( p >= pend )	return 0;
    else		return p;
}

// convert the unsigned long 'x' to the ascii string starting at 'p' which
// 	is 'n' bytes long.
// NOTE:  The string is right justified.  So, if n is 16, you are creating
// 		a 16-byte string with the null character at the rightmost
// 		byte.
//
//XXX static __forceinline void
static __forceinline __declspec(gp_reg) int
kenw_ultoa_sram(	__declspec(sram) unsigned long x,
		__declspec(sram) char *p,
		__declspec(gp_reg) int n ) {
//XXX    int k;
//XXX    int K;
    __declspec(gp_reg) int k;
    __declspec(gp_reg) int K;
    __declspec(gp_reg) int r;
    __declspec(gp_reg) int ndigits;
    __declspec(sram) char *pend;

    ndigits = 0;
    pend = p + n - 1;
    *pend = '\0';
    --pend;

    while( pend >= p ) {
    	if( x > 0 ) {
    	    r = x%10;
	    *pend = '0' + r;
	    ++ndigits;
	} else if( ndigits == 0 ) {
	    *pend = '0';
	    ndigits = 1;
	} else {
	    *pend = ' ';
	}
	x = x/10;
    	--pend;
    }
//XXX    if( x > 0 )		return;
    if( x > 0 )		return -1;

    K = ndigits + 1;
    pend = p + n - ndigits - 1;
    for( k=0; k<K; k++) {
    	*p = *pend;
	p++;
	pend++;
    }
    *p = '\0';
    return ndigits;
}

// convert ascii string to unsigned long
//
static __forceinline unsigned long
kenw_atou_sram( __declspec(sram) char *p ) {
    __declspec(gp_reg) unsigned long x = 0;
    __declspec(gp_reg) int	     y;

    while( *p != '\0' ) {
    	if( *p != ' ' )	{
    	    x *= 10;
    	    y = *p - '0';
	    if( (y<0) || (y>9) )	return 0;
	    x += y;
	}
	p++;
    }
    return x;
}

// convert unsigned long to hex ascii
//
static __forceinline void
kenw_ul2hex( unsigned long x, char hexstr[8] ) {
    unsigned int c;

    c = x>>28;
    if( c < 10 )	hexstr[0] = '0' + c;
    else		hexstr[0] = 'a' + c - 10;
    c = x>>24 & 0xf;
    if( c < 10 )	hexstr[1] = '0' + c;
    else		hexstr[1] = 'a' + c - 10;
    c = x>>20 & 0xf;
    if( c < 10 )	hexstr[2] = '0' + c;
    else		hexstr[2] = 'a' + c - 10;
    c = x>>16 & 0xf;
    if( c < 10 )	hexstr[3] = '0' + c;
    else		hexstr[3] = 'a' + c - 10;
    c = x>>12 & 0xf;
    if( c < 10 )	hexstr[4] = '0' + c;
    else		hexstr[4] = 'a' + c - 10;
    c = x>>8 & 0xf;
    if( c < 10 )	hexstr[5] = '0' + c;
    else		hexstr[5] = 'a' + c - 10;
    c = x>>4 & 0xf;
    if( c < 10 )	hexstr[6] = '0' + c;
    else		hexstr[6] = 'a' + c - 10;
    c = x & 0xf;
    if( c < 10 )	hexstr[7] = '0' + c;
    else		hexstr[7] = 'a' + c - 10;
}

// forward reference
void dl_sink_delay( void );
struct delay_item_tag * delayq_alloc( struct delayq_tag *delayq );
void delayq_free( struct delayq_tag *delayq, struct delay_item_tag *item );
int delayq_enq(
	__declspec(shared, sram)
	  struct delayq_tag *qptr,	long long time,
	unsigned int buf_handle, 	unsigned int out_port, 
	unsigned int qid,		unsigned int l3_pkt_len );
int delayq_pop( __declspec(shared, sram) struct delayq_tag *qptr );


//-----------------------------------------------------------
// Begin Normal Functions
//-----------------------------------------------------------
void handle_pkt_user()  {
    __declspec(gp_reg) buf_handle_t buf_handle;	// see dl_buf.h, ixp_lib.h   
    __declspec(gp_reg) onl_api_buf_desc bufDescriptor;// see dl_buf.h, ixp_lib.h
    unsigned int bufDescPtr;
    __declspec(gp_reg) onl_api_ip_hdr ipv4_hdr; // defined in ixp_lib.h
    unsigned int ipv4HdrPtr;
    unsigned int dramBufferPtr;
    union tm_tag x;
    struct delay_item_tag *delay_item_ptr;
    unsigned long ninq;
    __declspec(gp_reg) unsigned int out_port;
    __declspec(gp_reg) unsigned int qid;
    
    SIGNAL sram_sig ;
    __declspec(sram_read_reg) unsigned int link_cap[1];
    __declspec(sram) unsigned int portRatePtr;

    __declspec(local_mem) char delaymsg[18] = "delayed! ";
    __declspec(local_mem) char ninqstr[10];


    unsigned int xxx;
    __declspec(local_mem) char dbgmsg[28];
    char msg_enq_rc[28] = "enq rc = -1. ";

__declspec(local_mem) char lmem_tmpstr[12];
__declspec(sram) char sram_tmpstr[12];

#ifdef DEBUG
    // debug stuff
//XXX    __declspec(sram) char tmpstr[12];
    char msg_got_pkt[12] = "got pkt ";
#endif

//XXX    ++ninq;
    ++npkts;

    // Read the buf handle from the input Ring
    onl_api_get_buf_handle(&buf_handle); 

    // Calculate the buffer descriptor location in SRAM
    bufDescPtr = onl_api_getBufferDescriptorPtr(buf_handle);

    // Read the buffer Descriptor from SRAM
    onl_api_readBufferDescriptor(bufDescPtr, &bufDescriptor);


//jmc5 notes:  need to store delay for each packet?
    dramBufferPtr = onl_api_getBufferPtr(buf_handle);
    ipv4HdrPtr = onl_api_getIpv4HdrPtr(dramBufferPtr, bufDescriptor.offset);
    onl_api_readIpv4Hdr(ipv4HdrPtr, &ipv4_hdr);       

//WWW
    out_port = (ring_in.uc_mc_bits >> 3) & 0x7;
    qid = ((out_port+1) << 13) | ring_in.qid,

    //memcpy_lmem_sram(&link_cap,(void*) (0x2333E0), 32);

    portRatePtr = 0x402333E0;
    portRatePtr = portRatePtr + ((sizeof(int)) * out_port);  //I thought it should be 4 * 8 * out_port

    sram_read(&link_cap, (__declspec(sram) void *) portRatePtr, 1, ctx_swap, &sram_sig);

    //calculate delay in cycles
    delay = 8 * 14 * ipv4_hdr.ip_len * 1000;
    delay = delay / (link_cap[0] * 683); 
    delay = delay * 100; 

    //delay = delay * (delayq.ninq); //???

    onl_api_int2str(delayq.ninq, ninqstr);
    strcat_lmem(delaymsg, ninqstr);
    onl_api_debug_message( msgNextBlock, delaymsg );

	//having the following line be delay - 11 * ipv4_hdr.ip_len; causes irresolvable register jam! (why?)
    delay = delay - ipv4_hdr.ip_len * 22/2; //should be following:  //56 * ipv4_hdr.ip_len / 5; 
	//to account for the time it actually takes the packet to traverse
	//the link at >1000 Mbps???

    x.tm2.lo = local_csr_read( local_csr_timestamp_low );
    x.tm2.hi = local_csr_read( local_csr_timestamp_high );
    xxx = delay >> 4; 
//delay is now already in cycles //msec2cycles( delay ) >> 4;
    z.tm = x.tm + xxx;
    
    ninq = delayq_enq( &delayq, z.tm, ring_in.buf_handle_lo24,
					out_port, qid, ring_in.l3_pkt_len );
    if( ninq == -1 ) {
	kenw_ultoa_sram( npkts, sram_tmpstr, 12 );
	memcpy_lmem_sram( dbgmsg, sram_tmpstr, 12 );
//ZZZ	memcpy_lmem_sram( dbgmsg, msg_enq_rc, 28 );
	onl_api_debug_message( msgNextBlock, dbgmsg );
	onl_api_drop();
	++ndrops;
	return;
    }

#ifdef DEBUG
//XXX    memcpy_lmem_sram( dbgmsg, msg_got_pkt, 20 );
//XXX    pkt_count++;
//XXX    kenw_ultoa_sram( pkt_count, tmpstr, 20 );
//XXX    strcat_lmem(dbgmsg, tmpstr);
//XXX    onl_api_debug_message( msgNextBlock, dbgmsg );
#endif
}

void handle_msg_user(){}

void plugin_init_user()
{
    __declspec(local_mem) char delayq_init_err[28] = "delayq_init error";
    if(ctx() == 0)
    {
	npkts = 0;		// #pkts seen by plugin
//XXX 	ninq = 0;		// #pkts in queue
	delayq_lock = UNLOCKED;
	if (delayq_init( &delayq ) != 0 ) {
	    onl_api_debug_message( msgNextBlock, delayq_init_err );
	}

#ifdef DEBUG
	__set_timestamp( 0 );
#endif
    }
}



/**
	----------------------------------------------------------------
 @User: YOU SHOULD NOT NEED TO MAKE ANY CHANGES TO THE REST OF THIS FILE
	----------------------------------------------------------------
*/




void default_format_out_data(unsigned int nextblock)
{
  __declspec(gp_reg) int out_port;
  if(nextblock == QM)
  {
    __declspec(gp_reg) dl_buf_handle_t buf_handle;
    __declspec(sram) unsigned int *buf_desc_ptr;
    __declspec(sram_write_reg) unsigned int sram_wr_regs[3];
    SIGNAL sram_sig;

    // assume unicast
    out_port = (ring_in.uc_mc_bits >> 3) & 0x7;
    ring_out.plugin_qm_data_out.out_port = out_port;

    ring_out.plugin_qm_data_out.qid = ((out_port+1) << 13) | ring_in.qid;

    ring_out.plugin_qm_data_out.l3_pkt_len = ring_in.l3_pkt_len;
    ring_out.plugin_qm_data_out.buf_handle_lo24 = ring_in.buf_handle_lo24;

/*
    // also need to write the stats index, nh mac info and ethertype info into the  buffer
    // descriptor
    onl_api_get_buf_handle(&buf_handle);
    buf_desc_ptr = (__declspec(sram) unsigned int*) (Dl_BufGetDesc(buf_handle) + 12);

    sram_wr_regs[0] = (ring_in.stats_index << 16) | (ring_in.nh_eth_daddr_hi32 >> 16);
    sram_wr_regs[1] = (ring_in.nh_eth_daddr_hi32 << 16) | (ring_in.nh_eth_daddr_lo16);
    sram_wr_regs[2] = ring_in.eth_type << 16;

    // write data to payload buf desc
    sram_write(sram_wr_regs, buf_desc_ptr, 3, ctx_swap, &sram_sig);
*/
  }
  else // assume MUX and ignore xscale data packets for now
  {
    // assume unicast
    out_port = (ring_in.uc_mc_bits >> 3) & 0x7;
    ring_out.plugin_mux_data_out.out_port = out_port;
    
    ring_out.plugin_mux_data_out.qid = ((out_port+1) << 13) | ring_in.qid;

    // assume pass-through
    ring_out.plugin_mux_data_out.flags = 1;

    ring_out.plugin_mux_data_out.plugin_tag = ring_in.plugin_tag;
    ring_out.plugin_mux_data_out.in_port = ring_in.in_port;
    ring_out.plugin_mux_data_out.stats_index = ring_in.stats_index;
    ring_out.plugin_mux_data_out.l3_pkt_len = ring_in.l3_pkt_len;
    ring_out.plugin_mux_data_out.buf_handle_lo24 = ring_in.buf_handle_lo24;
  }
}

/* handle packets */
void handle_pkt()
{
  dl_source_packet(dlFromBlock);
  
  // format ring_out data based only on ring_in data
  default_format_out_data(dlNextBlock);

  handle_pkt_user();

  dl_sink_nopacket();
}


/* handle control messages */
// op codes:
//   set:
//	delay=	set delay to X msec (e.g., "delay= 50")
//   get:
//	=delay	get delay (msec)
//	=ninq	get ninq (#pkts in queue)
//	=ndrops	get ndrops (#pkts dropped)
//	=npkts	get npkts seen by plugin (stored in plugin counter 0)
//   miscellaneous:
//	reset	reset npkts and ndrops counters
//
void handle_msg()
{
    // assume messages are at most 8 words for now
    __declspec(gp_reg) unsigned int i;
    __declspec(gp_reg) unsigned int message[8];
    __declspec(gp_reg) onl_api_ctrl_msg_hdr hdr;
    __declspec(local_mem) char inmsgstr[28];			// inbound
    __declspec(local_mem) char outmsgstr[28] = "";		// outbound
    __declspec(local_mem) char lmem_tmpstr[8];
    __declspec(sram) char *valptr;
    __declspec(sram) char sram_inmsgstr[28];
    __declspec(sram) char sram_tmpstr8[8];
    __declspec(sram) char sram_tmpstr12[12];
    __declspec(sram) char sram_space[2] = " ";
    __declspec(sram) char sram_outmsgstr[32] = "";		// outbound

    char GET_ninq[8]	= "=ninq";
    char GET_npkts[8]	= "=npkts";
    char GET_ndrops[8]	= "=ndrops";
    char RESET[8]	= "reset";
    char BAD_op_err[8]	= "BAD OP";		// error msgs
    char NO_arg_err[8]	= "NO ARG";

//XXX    for(i=0; i<8; ++i) { message[i] = 0; }
    // to get rid of the compiler error:  "Incorrect use of register variable
    // message - check for & operator or non-constant buffer index"
    message[0] = 0;
    message[1] = 0;
    message[2] = 0;
    message[3] = 0;
    message[4] = 0;
    message[5] = 0;
    message[6] = 0;
    message[7] = 0;

    dl_source_message(msgFromBlock, message);

    hdr.value = message[0];
    if( hdr.type != CM_CONTROLMSG )	return;
    if( hdr.response_requested != 1 )	return;

    onl_api_intarr2str( &message[1], inmsgstr );

    memcpy_sram_lmem( sram_inmsgstr, inmsgstr, 28 );

    if( strncmp_sram(sram_inmsgstr, GET_npkts, 6) == 0 ) {
	kenw_ultoa_sram( npkts, sram_tmpstr8, 8 );
	memcpy_lmem_sram( lmem_tmpstr, sram_tmpstr8, 8 );
    	strcat_lmem( outmsgstr, lmem_tmpstr );
    } else if( strncmp_sram(sram_inmsgstr, GET_ndrops, 6) == 0 ) {
	kenw_ultoa_sram( ndrops, sram_tmpstr8, 8 );
	memcpy_lmem_sram( lmem_tmpstr, sram_tmpstr8, 8 );
    	strcat_lmem( outmsgstr, lmem_tmpstr );
    } else if( strncmp_sram(sram_inmsgstr, GET_ninq, 5) == 0 ) {
	kenw_ultoa_sram( delayq.ninq, sram_tmpstr8, 8 );
	memcpy_lmem_sram( lmem_tmpstr, sram_tmpstr8, 8 );
    	strcat_lmem( outmsgstr, lmem_tmpstr );
    } else if( strncmp_sram(sram_inmsgstr, RESET, 5) == 0 ) {
	npkts = 0;
	ndrops = 0;
    } else {
    	strcat_lmem( outmsgstr, memcpy_lmem_sram(lmem_tmpstr,BAD_op_err,8) );
    }

    if( onl_api_str2intarr(outmsgstr, &message[1]) < 0 )	return;

    hdr.type = CM_CONTROLMSGRSP;
    hdr.response_requested = 0;
    hdr.num_words = 7;
    message[0] = hdr.value;

    dl_sink_message(msgNextBlock, message);
}

/* handle periodic functionality */
void callback()
{
    union tm_tag y;

#ifdef DEBUG
    __declspec(local_mem) char dbgmsg[28];
//XXX    __declspec(local_mem) char tmpstr[12];

    char msg_ninq[8] = "ninq = ";
    char msg_delayq_ninq[12] = "dq.ninq = ";
    char msg_fwdpkt[8] = "fwdpkt";
#endif

//jmc5 - need to look through whole queue for one that is ready to leave?
    if ( delayq.ninq > 0 ) {
	y.tm2.lo = local_csr_read( local_csr_timestamp_low );
	y.tm2.hi = local_csr_read( local_csr_timestamp_high );
	if ( y.tm >= (delayq.hd->time.tm)){  //time to leave

#ifdef DEBUG
memcpy_lmem_sram( dbgmsg, msg_fwdpkt, 8 );
onl_api_debug_message( msgNextBlock, dbgmsg );
#endif

#ifdef DEBUG
//XXX	    onl_api_int2str( y.tm2.hi, tm_str );
//XXX	    strcat_lmem( dbgmsg, tm_str );
//XXX
//XXX	    strcat_lmem(dbgmsg, dot_str);
//XXX	    onl_api_int2str( y.tm2.lo, tm_str );
//XXX	    strcat_lmem( dbgmsg, tm_str );
//XXX
//XXX	    onl_api_debug_message( msgNextBlock, dbgmsg );
#endif

	    dl_sink_delay( );
	} else {
	    sleep( SLEEP_CYCLES );
	}
    } else {
	sleep( SLEEP_CYCLES );
    }
}


/* take care of any setup that needs to be done before processing begins */
void plugin_init()
{
  /* set a default timeout value for callback of 10000 cycles */
//XXX  timeout = 10000;
  /* set the default next block to be the Queue Manager */
  dlNextBlock = QM;

  /* by default, get packets and get and put control messages from input rings
   * based on which microengine we are currently running on; this assumes a
   * default one to one mapping */
  switch(__ME())
  {
    case 0x7:
      pluginId = 0;
      dlFromBlock  = PACKET_IN_RING_0;
      msgFromBlock = MESSAGE_IN_RING_0;
      msgNextBlock = MESSAGE_OUT_RING_0;
      break;
    case 0x10:
      pluginId = 1;
      dlFromBlock  = PACKET_IN_RING_1;
      msgFromBlock = MESSAGE_IN_RING_1;
      msgNextBlock = MESSAGE_OUT_RING_1;

      break;
    case 0x11:
      pluginId = 2;
      dlFromBlock  = PACKET_IN_RING_2;
      msgFromBlock = MESSAGE_IN_RING_2;
      msgNextBlock = MESSAGE_OUT_RING_2;  
      break;
    case 0x12:
      pluginId = 3;
      dlFromBlock  = PACKET_IN_RING_3;
      msgFromBlock = MESSAGE_IN_RING_3;
      msgNextBlock = MESSAGE_OUT_RING_3;    
      break;
    case 0x13:
      pluginId = 4;
      dlFromBlock  = PACKET_IN_RING_4;
      msgFromBlock = MESSAGE_IN_RING_4;
      msgNextBlock = MESSAGE_OUT_RING_4;
      break;
    default:  // keep the compiler happy
      pluginId = 0;
      dlFromBlock  = PACKET_IN_RING_0;
      msgFromBlock = MESSAGE_IN_RING_0;
      msgNextBlock = MESSAGE_OUT_RING_0;
      break;
  }

  plugin_init_user(); // user hook
}


/* entry point */
void main()
{
  int c;

  /* do initialization */
  plugin_init();
  dl_sink_init();
  dl_source_init();

  /* get the current thread's context number (0-7) */
  c = ctx();

  if(c >= FIRST_PACKET_THREAD && c <= LAST_PACKET_THREAD)
  {
    while(1)
    {
      handle_pkt();
    }
  }
#ifdef MESSAGE_THREAD
  else if(c == MESSAGE_THREAD)
  {
    while(1)
    {
      handle_msg();
    }
  }
#endif
  else if(c == CALLBACK_THREAD)
  {
    while(1)
    {
      callback();
    }
  }
}

// Same as dl_sink_packet() but don't do any signalling and assume pkt
// goes to QM
//
void dl_sink_delay( void ) {
    plugin_out_data my_ring_out; // ring data to next block
    int		rc;
    __declspec(local_mem) char dbgmsg[28];
    char msg_bad_pop_err[8]	= "BAD pop";


    my_ring_out.plugin_qm_data_out.out_port	= delayq.hd->out_port;
    my_ring_out.plugin_qm_data_out.qid		= delayq.hd->qid;
    my_ring_out.plugin_qm_data_out.l3_pkt_len	= delayq.hd->l3_pkt_len;
    my_ring_out.plugin_qm_data_out.buf_handle_lo24 = delayq.hd->buf_handle;

    scr_ring_put_buffer_3word(PLUGIN_TO_QM_RING, my_ring_out.i, 0);

    rc = delayq_pop( &delayq );
    if( rc != 0 ) {
	memcpy_lmem_sram( dbgmsg, msg_bad_pop_err, 8 );
	onl_api_debug_message( msgNextBlock, dbgmsg );
    }
}

// --------------------------------------------------------------------------
// queueing functions
// --------------------------------------------------------------------------

// initialize delay queue
int
delayq_init( __declspec(shared, sram) struct delayq_tag *qptr ) {
    __declspec(local_mem) char dbgmsg[28];
    __declspec(sram) char BAD_me_num_err[28] = "BAD microengine #";
    int		i;
    int		K = MAX_QUEUE_SZ-1;
    struct delay_item_tag *item_ptr;

    if ( pluginId == 0)		item_ptr = (struct delay_item_tag *) 0xC0100000;
    else if ( pluginId == 1)	item_ptr = (struct delay_item_tag *) 0xC0200000;
    else if ( pluginId == 2)	item_ptr = (struct delay_item_tag *) 0xC0300000;
    else if ( pluginId == 3)	item_ptr = (struct delay_item_tag *) 0xC0400000;
    else if ( pluginId == 4)	item_ptr = (struct delay_item_tag *) 0xC0500000;
    else	return -1;

    qptr->free_hd = item_ptr;
    qptr->hd = qptr->tl = 0;
    qptr->ninq = 0;

    (item_ptr+K)->next = 0;

    for (i=0; i<K; i++) {
    	item_ptr->next = item_ptr+1;
	++item_ptr;
    }

    return 0;
}

// insert item at end of queue
// 	return number of items if OK; else -1
int
delayq_enq(	__declspec(shared, sram) struct delayq_tag *qptr,
		long long time,
		unsigned int buf_handle, 
		unsigned int out_port, 
		unsigned int qid, 
		unsigned int l3_pkt_len ) {
    struct delay_item_tag	*item;

while( delayq_lock == LOCKED ) {
    ctx_swap();
}
delayq_lock = LOCKED;

    item = delayq_alloc( &delayq );
    if( item == 0 ) {
    	return -1;
    }

    item->time.tm = time;
    item->buf_handle = buf_handle;
    item->out_port = out_port;
    item->qid = qid;
    item->l3_pkt_len = l3_pkt_len;
    item->next = 0;

    if( qptr->ninq == 0 )	qptr->hd = item;
    else			qptr->tl->next = item;
    qptr->tl = item;

    ++(qptr->ninq);

delayq_lock = UNLOCKED;
    return qptr->ninq;
}

// pop front of list
int
delayq_pop( __declspec(shared, sram) struct delayq_tag *qptr ) {
    struct delay_item_tag	*item;

while( delayq_lock == LOCKED ) {
    ctx_swap();
}
delayq_lock = LOCKED;

    if( qptr->ninq <= 0 )	return -1;

    item = qptr->hd;
    qptr->hd = item->next;
    --(qptr->ninq);
    if( qptr->ninq == 0 )	qptr->tl = 0;
    delayq_free( qptr, item );

delayq_lock = UNLOCKED;
    return 0;
}

// allocate an item
struct delay_item_tag *
delayq_alloc( struct delayq_tag *delayq ) {
    struct delay_item_tag *item;

    if( delayq->free_hd == 0 )	return 0;

    item = delayq->free_hd;
    delayq->free_hd = item->next;
    return item;
}

// free an item
void
delayq_free( struct delayq_tag *delayq, struct delay_item_tag *item ) {
    if( item == 0 )	return;
    item->next = delayq->free_hd;
    delayq->free_hd = item;
}

#endif	// _PLUGIN_DELAY_C
