/* modu64.c - calculate unsigned 64:32bit division 32bit remainder
   designed to be used with cpu without native 64bit math (such as 68060)

   this source use non-ansi-c datatype 'long long'.

   Written by Harry "Piru" Sintonen.
*/

#include <stdio.h>

#define DEB

typedef unsigned short int uint16;
typedef unsigned long int uint32;
typedef unsigned long long uint64;

uint64 mulu64(uint32 multiplier,
              uint32 multiplicand)
{
#if 1
  uint16 carry;
  uint32 bd, ad, bc, ac;

  if ((multiplier == 0) || (multiplicand == 0)) return 0;

  /* [(a<<16)+b] × [(c<<16)+d] = ac<<32 + (ad+bc)<<16 + bd
  */

  bd = ((uint16) multiplier)           * ((uint16) multiplicand);
  ad = ((uint16) (multiplicand >> 16)) * ((uint16) multiplier);
  bc = ((uint16) (multiplier >> 16))   * ((uint16) multiplicand);
  ac = ((uint16) (multiplier >> 16))   * ((uint16) (multiplicand >> 16));

  /* swap
  */
  bd = bd << 16 | bd >> 16;

  /* Add ad to bd's most significant word
  */
  carry = bd; carry += ad;
  ac += ((uint16) bd > carry); bd = (bd & 0xffff0000) | carry;

  /* Add bc to bd's most significant word
  */
  carry = bd; carry += bc;
  ac += ((uint16) bd > carry); bd = (bd & 0xffff0000) | carry;

  ad >>= 16;
  bc >>= 16;
  ad += ac;
  bd = bd << 16 | bd >> 16;    /* reorder bd */
  ad += bc;                    /* most signifcant longword */

  return (((uint64) ad) << 32) | bd;

#else
  return ((uint64) multiplier) * multiplicand;
#endif
}

uint32 modu64(uint32 divisor,
              uint32 dividend_hi,
              uint32 dividend_lo)
{
  char shift, term;
#ifdef DIVU64
  uint32 quatient;
#endif

  printf("%08lx %08lx : %08lx\n", dividend_hi, dividend_lo, divisor);
  printf("quatient: %08lx\n", (uint32) (((uint64) dividend_hi << 32 | dividend_lo) / divisor));
  printf("remainder1: %08lx\n", (uint32) (((uint64) dividend_hi << 32 | dividend_lo) % divisor));

  /* if upper 32bits are zero
  */
  if (dividend_hi == 0)
  {
    /* if lower 32bit are zero, remainder is zero
    */
    if (dividend_lo == 0) return 0;

    /* if divisor > dividend then remainder is dividend
    */
    if (divisor > dividend_lo) return dividend_lo;

    /* else use regular 32bit math
    */
    return dividend_lo % divisor;
  }

  /* ok the dividend is full 64bit value, we need to do more
     work.
  */

  /* first check if the division will overflow (and emulate
     the condition similar to 68020+)
  */
#if 1
  if (divisor < dividend_hi) return dividend_hi;
#endif

  /* if divisor < 65536 we can use simple math and remainder
     is < 65536 then too.
  */
  if (divisor < 65536)
  {
    return (((dividend_hi << 16 | dividend_lo >> 16) %
             (uint16) divisor) << 16 |
            (dividend_lo & 0x0000ffff)) % (uint16) divisor;
  }


  /* oh well, in this case there is no shortcut, we need to do
     the full thing.
  */

  shift = 0; term = 0;

  /* pre-adjust the values
  */
  while (!(divisor & 0x80000000))
  {
    shift++;
    divisor <<= 1;
  }
  dividend_hi = dividend_hi << shift | dividend_lo >> (32 - shift);
  dividend_lo <<= shift;

#ifdef DEB
  printf("divisor: %08lx\n", divisor);
  printf("dividend_hi: %08lx dividend_lo: %08lx\n",
         dividend_hi, dividend_lo);
#endif

  /* main loop (only run twice)
  */
  for (;;)
  {
    uint16 cnt;
    uint32 diff, prev_hi, prev_lo;
    uint64 delta;

    /* determine the count
    */
    if ((dividend_hi >> 16) == (divisor >> 16))
    {
      cnt = 0xffff;
    }
    else
    {
      cnt = dividend_hi / ((uint16) (divisor >> 16));
    }
#ifdef DEB
    printf("cnt1: %u\n", cnt);
#endif

    for (;; cnt--)
    {
      diff = dividend_hi - ((uint16) (divisor >> 16)) * cnt;
      if (diff >> 16) break;
      diff = diff << 16 | dividend_lo >> 16;
      if (diff >= (((uint16) divisor) * cnt)) break;
    }
#ifdef DEB
    printf("cnt2: %u\n", cnt);
#endif

    delta = mulu64(cnt << 16, divisor);

    /* decrement the delta, use sub & subx
    */
    prev_lo = dividend_lo;
    dividend_lo -= (uint32) delta;
    prev_hi = dividend_hi;
    dividend_hi -= (uint32) (delta >> 32) + (dividend_lo > prev_lo);

    /* carry?
    */
    if (dividend_hi > prev_hi)
    {
#ifndef DIVU64
      cnt--;
#endif
      /* add due to carry, use add & addx
      */
      prev_lo = dividend_lo;
      dividend_lo += divisor << 16;
      dividend_hi += (divisor >> 16) + (dividend_lo < prev_lo);
    }

    if (term) break;

#ifdef DIVU64
    /* set quatient upper 16bits
    */
    quatient = cnt << 16;
#endif

    dividend_hi = dividend_hi << 16 | dividend_lo >> 16;
    dividend_lo <<= 16;
#ifdef DEB
    printf("dividend_hi: %08lx dividend_lo: %08lx **\n",
           dividend_hi, dividend_lo);
#endif

    term = 1;
  }

#ifdef DIVU64
  /* set quatient lower 16bits
  */
  quatient |= cnt;
#endif

  dividend_lo = dividend_lo >> 16 | dividend_hi << 16;
  dividend_hi = dividend_hi << 16 | dividend_hi >> 16;
#ifdef DEB
  printf("dividend_hi: %08lx dividend_lo: %08lx\n",
          dividend_hi, dividend_lo);
#endif
  /* shift the result back, return remainder
  */

  printf("dividend_hi << (32 - shift): %08lx\n", dividend_hi << (32 - shift));
  printf("dividend_lo >> shift: %08lx\n", dividend_lo >> shift);

  return dividend_hi << (32 - shift) | dividend_lo >> shift;
}



int main(void)
{

  printf("remainder2: %08lx\n",
         modu64(3150505, 0xA, 0x8AD920F9));

  printf("remainder2: %08lx\n",
         modu64(0x33333333, 0x12345678, 0x9abcdef0));

  printf("remainder2: %08lx\n",
         modu64(434316536, 132432432, 456732131));

  printf("remainder2: %08lx\n",
         modu64(14536, 13243, 3242432423UL));

  printf("remainder2: %08lx\n",
         modu64(65536, 1, 456732131));

  printf("remainder2: %08lx\n",
         modu64(652536323, 143232321, 456732131));

  printf("remainder2: %08lx\n",
         modu64(4052536323UL, 3943232321UL, 456732131));

  return 1;
}