Logo Search packages:      
Sourcecode: xaos version File versions  Download package

sstring.h

#if 0
#ifndef _I386_STRING_I486_H_
#define _I386_STRING_I486_H_
#if defined(__OPTIMIZE__) && defined(__GNUC__) && defined(__i386__)
/*
 * This string-include defines all string functions as inline
 * functions. Use gcc. It also assumes ds=es=data space, this should be
 * normal. Most of the string-functions are rather heavily hand-optimized,
 * see especially strtok,strstr,str[c]spn. They should work, but are not
 * very easy to understand. Everything is done entirely within the register
 * set, making the functions fast and clean. 
 *
 *          Copyright (C) 1991, 1992 Linus Torvalds
 *          Revised and optimized for i486/pentium
 *          1994/03/15 by Alberto Vignani/Davide Parodi @crf.it
 *
 *    Split into 2 CPU specific files by Alan Cox to keep #ifdef noise down.
 *
 *    Revised and optimized again by Jan Hubicka  (1997/11/16)
 *    (please report bugs to hubicka@paru.cas.cz)
 *
 *    memset and memcpy routines seems to be always faster at 486 and
 *    pentium but at pentium MMX they are sometimes bit slower (5-10%)..
 *    because of less strict register allocation they produces better code.
 */


#define __HAVE_ARCH_MEMCPY
#define memcpy(d,s,count) \
(__builtin_constant_p(count) ? \
 __memcpy_c((d),(s),(count)) : \
 __memcpy_g((d),(s),(count)))

/*
 *    These ought to get tweaked to do some cache priming.
 */


/* This implementation of the memcpy is designed for moveoldpoints from
 * mkrealloctables. It is expected to work well for both small and large
 * sizes.
 *
 * Small (1-10) and meduim (300) sizes seems to be important for XaoS. 
 * So implementation is not super fast for large sizes, but my experiemnts
 * don't show large improvements in speed anyway.
 *
 * We use rep movsX operations (they works well on PPro and don't seems to be
 * so bad on Pentium) and expect cld operation to be set. Hope that it will
 * not make problems.
 *
 * My attempt was to use c code where possible to let GCC do the
 */
extern inline void *__memcpy_g (void *to, const register void *from,
                        register size_t n);
extern inline void *
__memcpy_g (void *to, const register void *from, register size_t n)
{
  register void *tmp = (void *) to;
  if (n >= 7)
    {
      register int c = (-(int) to) & 3;
      n -= c;
      __asm__ __volatile__ (  /*Align the destination */
                       "rep\n\tmovsb":"=c" (c), "=D" (tmp),
                       "=S" (from):"c" (c), "D" ((long) tmp),
                       "S" ((long) from):"memory");
      c = n >> 2;
      __asm__ __volatile__ (  /*Copy the main body */
                       "rep\n\tmovsl":"=c" (c), "=D" (tmp),
                       "=S" (from):"c" (c), "D" ((long) tmp),
                       "S" ((long) from):"memory");
      n &= 3;
    }
  __asm__ __volatile__ ("rep\n\tmovsb":"=c" (n), "=D" (tmp),
                  "=S" (from):"c" (n), "D" ((long) tmp),
                  "S" ((long) from):"memory");
  return (to);
}

/*
 * This looks horribly ugly, but the compiler can optimize it totally,
 * as the count is constant.
 */

#define COMMON(x) \
__asm__ __volatile__ ( \
      "\n.align 4\n" \
      "1:\tmovl (%2),%0\n\t" \
      "addl $4,%2\n\t" \
      "movl %0,(%1)\n\t" \
      "addl $4,%1\n\t" \
      "decl %3\n\t" \
      "jnz 1b\n" \
      x \
      :"=r" (dummy1), "=r" (tmp), "=r" (from), "=r" (dummy2)  \
      :"1" (tmp), "2" (from), "3" (n/4) \
      :"memory"); \
return (to); \

extern inline void *__memcpy_c (void *to, const void *from, size_t n);
extern inline void *
__memcpy_c (void *to, const void *from, size_t n)
{
  if (n < 24)
    {
      if (n >= 4)
      ((unsigned long *) to)[0] = ((const unsigned long *) from)[0];
      if (n >= 8)
      ((unsigned long *) to)[1] = ((const unsigned long *) from)[1];
      if (n >= 12)
      ((unsigned long *) to)[2] = ((const unsigned long *) from)[2];
      if (n >= 16)
      ((unsigned long *) to)[3] = ((const unsigned long *) from)[3];
      if (n >= 20)
      ((unsigned long *) to)[4] = ((const unsigned long *) from)[4];
      switch ((unsigned int) (n % 4))
      {
      case 3:
        ((unsigned short *) to)[n / 2 - 1] =
          ((const unsigned short *) from)[n / 2 - 1];
        ((unsigned char *) to)[n - 1] =
          ((const unsigned char *) from)[n - 1];
        return to;
      case 2:
        ((unsigned short *) to)[n / 2 - 1] =
          ((const unsigned short *) from)[n / 2 - 1];
        return to;
      case 1:
        ((unsigned char *) to)[n - 1] =
          ((const unsigned char *) from)[n - 1];
      case 0:
        return to;
      }
    }
  {
    register void *tmp = (void *) to;
    register int dummy1, dummy2;
    switch ((unsigned int) (n % 4))
      {
      case 0:
      COMMON ("");
      case 1:
      COMMON ("movb (%2),%b0 ; movb %b0,(%1)");
      case 2:
      COMMON ("movw (%2),%w0 ; movw %w0,(%1)");
      case 3:
      COMMON ("movw (%2),%w0 ; movw %w0,(%1)\n\t"
            "movb 2(%2),%b0 ; movb %b0,2(%1)");
      }
  }
  return to;
}

#undef COMMON


#define __HAVE_ARCH_MEMMOVE
extern inline void *memmove (void *dest, const void *src, size_t n);
extern inline void *
memmove (void *dest, const void *src, size_t n)
{
  register void *tmp = (void *) dest;
  if (dest < src)
    __asm__ __volatile__ ("cld\n\t" "rep\n\t" "movsb":      /* no output */
                    :"c" (n), "S" (src), "D" (tmp):"cx", "si", "di",
                    "memory");
  else
__asm__ __volatile__ ("std\n\t" "rep\n\t" "movsb\n\t" "cld":      /* no output */
: "c" (n), "S" (n - 1 + (const char *) src), "D" (n - 1 + (char *) tmp):"cx", "si", "di", "memory");
  return dest;
}

#define memcmp __builtin_memcmp

#define __HAVE_ARCH_MEMCHR
extern inline void *memchr (const void *cs, int c, size_t count);
extern inline void *
memchr (const void *cs, int c, size_t count)
{
  register void *__res;
  if (!count)
    return NULL;
  __asm__ __volatile__ ("cld\n\t"
                  "repne\n\t"
                  "scasb\n\t"
                  "je 1f\n\t"
                  "movl $1,%0\n"
                  "1:\tdecl %0":"=D" (__res):"a" (c), "D" (cs),
                  "c" (count):"cx");
  return __res;
}



#define __HAVE_ARCH_MEMSET
#define memset(s,c,count) \
(__builtin_constant_p(c) ? \
 (__builtin_constant_p(count) ? \
  __memset_cc((s),(c),(count)) : \
  __memset_cg((s),(c),(count))) : \
 (__builtin_constant_p(count) ? \
  __memset_gc((s),(c),(count)) : \
  __memset_gg((s),(c),(count))))




extern inline void *__memset_cg (void *s, char c, size_t count);
extern inline void *
__memset_cg (void *s, char c, size_t count)
{
  int tmp2;
  register void *tmp = (void *) s;
  __asm__ __volatile__ ("shrl $1,%%ecx\n\t"
                  "rep\n\t"
                  "stosw\n\t"
                  "jnc 1f\n\t"
                  "movb %%al,(%%edi)\n"
                  "1:":"=c" (tmp2), "=D" (tmp):"c" (count), "D" (tmp),
                  "a" (0x0101U * (unsigned char) c):"memory");
  return s;
}

extern inline void *__memset_gg (void *s, char c, size_t count);
extern inline void *
__memset_gg (void *s, char c, size_t count)
{
  register void *tmp = (void *) s;
  int tmp2;
  __asm__ __volatile__ ("movb %%al,%%ah\n\t"
                  "shrl $1,%%ecx\n\t"
                  "rep\n\t"
                  "stosw\n\t"
                  "jnc 1f\n\t"
                  "movb %%al,(%%edi)\n"
                  "1:":"=c" (tmp2), "=D" (tmp):"c" (count), "D" (tmp),
                  "a" (c):"memory");
  return s;
}

/*
 * This non-rep routines are not much faster (slower for small strings)
 * but they allows better register allocation
 */
#define COMMON(x) \
__asm__ __volatile__ ( \
      "\n.align 4\n" \
      "1:\tmovl %k2,(%k0)\n\t" \
      "addl $4,%k0\n\t" \
      "decl %k1\n\t" \
      "jnz 1b\n" \
      x \
      :"=r" (tmp), "=r" (dummy) \
      :"q" ((unsigned) pattern),  "0"  (tmp), "1" (count/4) \
      :"memory"); \
return s;

extern inline void *__memset_cc (void *s, unsigned long pattern,
                         size_t count);
extern inline void *
__memset_cc (void *s, unsigned long pattern, size_t count)
{
  pattern = ((unsigned char) pattern) * 0x01010101UL;
  if (count < 24)
    {
      /*Handle small values manualy since they are incredibly slow */

      if (count >= 4)
      *(unsigned long *) s = pattern;
      if (count >= 8)
      ((unsigned long *) s)[1] = pattern;
      if (count >= 12)
      ((unsigned long *) s)[2] = pattern;
      if (count >= 16)
      ((unsigned long *) s)[3] = pattern;
      if (count >= 20)
      ((unsigned long *) s)[4] = pattern;
      switch ((unsigned int) (count % 4))
      {
      case 3:
        ((unsigned short *) s)[count / 2 - 1] = pattern;
        ((unsigned char *) s)[count - 1] = pattern;
        return s;
      case 2:
        ((unsigned short *) s)[count / 2 - 1] = pattern;
        return s;
      case 1:
        ((unsigned char *) s)[count - 1] = pattern;
      case 0:
        return s;
      }
    }
  else
    {
      register void *tmp = (void *) s;
      register int dummy;
      switch ((unsigned int) (count % 4))
      {
      case 0:
        COMMON ("");
      case 1:
        COMMON ("movb %b2,(%0)");
      case 2:
        COMMON ("movw %w2,(%0)");
      case 3:
        COMMON ("movw %w2,(%0)\n\tmovb %b2,2(%0)");
      }
    }
  return s;
}

extern inline void *__memset_gc (void *s, unsigned long pattern,
                         size_t count);
extern inline void *
__memset_gc (void *s, unsigned long pattern, size_t count)
{
  if (count < 4)
    {
      if (count > 1)
      __asm__ ("movb %b0,%h0\n\t": "=q" (pattern):"0" ((unsigned)
           pattern));
      switch ((unsigned int) (count))
      {
      case 3:
        ((unsigned short *) s)[0] = pattern;
        ((unsigned char *) s)[2] = pattern;
        return s;
      case 2:
        *((unsigned short *) s) = pattern;
        return s;
      case 1:
        *(unsigned char *) s = pattern;
      case 0:
        return s;
      }
    }

__asm__ ("movb %b0,%h0\n\t" "pushw %w0\n\t" "shll $16,%k0\n\t" "popw %w0\n": "=q" (pattern):"0" ((unsigned)
       pattern));

  if (count < 24)
    {
      /*Handle small values manualy since they are incredibly slow */

      *(unsigned long *) s = pattern;
      if (count >= 8)
      ((unsigned long *) s)[1] = pattern;
      if (count >= 12)
      ((unsigned long *) s)[2] = pattern;
      if (count >= 16)
      ((unsigned long *) s)[3] = pattern;
      if (count >= 20)
      ((unsigned long *) s)[4] = pattern;
      switch ((unsigned int) (count % 4))
      {
      case 3:
        ((unsigned short *) s)[count / 2 - 1] = pattern;
        ((unsigned char *) s)[count - 1] = pattern;
        return s;
      case 2:
        ((unsigned short *) s)[count / 2 - 1] = pattern;
        return s;
      case 1:
        ((unsigned char *) s)[count - 1] = pattern;
      case 0:
        return s;
      }
    }
  else
    {
      register void *tmp = (void *) s;
      register int dummy;
      switch ((unsigned int) (count % 4))
      {
      case 0:
        COMMON ("");
      case 1:
        COMMON ("movb %b2,(%0)");
      case 2:
        COMMON ("movw %w2,(%0)");
      case 3:
        COMMON ("movw %w2,(%0)\n\tmovb %b2,2(%0)");
      }
    }
  return s;
}

#undef COMMON


/*
 * find the first occurrence of byte 'c', or 1 past the area if none
 */
#define __HAVE_ARCH_MEMSCAN
extern inline void *memscan (void *addr, int c, size_t size);
extern inline void *
memscan (void *addr, int c, size_t size)
{
  if (!size)
    return addr;
  __asm__ __volatile__ ("cld\n\t"
                  "repnz; scasb\n\t"
                  "jnz 1f\n\t"
                  "dec %%edi\n\t"
                  "1:":"=D" (addr), "=c" (size):"0" (addr), "1" (size),
                  "a" (c));
  return addr;
}

#define memset_long(x,y,z) __memset_long(x,y,z)

extern inline void *__memset_long (void *s, char c, size_t count);
extern inline void *
__memset_long (void *s, char c, size_t count)
{
  register unsigned int fill = c;
  register void *tmp = (void *) s;
  if (count >= 7)
    {
      register int c = (-(int) s) & 3;
/*__asm__ __volatile__ ("movb %b0,%h0":"=r"(fill):"r"(fill));*/
      fill |= fill << 8;
      count -= c;
      fill |= fill << 16;
      __asm__ __volatile__ ("rep\n\tstosb":"=c" (c), "=D" (tmp):"c" (c),
                      "D" (tmp), "a" (fill):"memory");
      c = count >> 2;
      __asm__ __volatile__ ("rep\n\tstosl":"=c" (c), "=D" (tmp):"c" (c),
                      "D" (tmp), "a" (fill):"memory");
      count &= 3;
    }
  __asm__ __volatile__ ("rep\n\tstosb":"=c" (count), "=D" (tmp):"c" (count),
                  "D" (tmp), "a" ((char) fill):"memory");
  return s;
}
#endif
#endif
#endif

Generated by  Doxygen 1.6.0   Back to index