Updated from GMP 1.906.7
[kopensolaris-gnu/glibc.git] / sysdeps / i386 / i586 / memcopy.h
1 /* memcopy.h -- definitions for memory copy functions.  Pentium version.
2    Copyright (C) 1994 Free Software Foundation, Inc.
3    Contributed by Torbjorn Granlund (tege@sics.se).
4
5 This file is part of the GNU C Library.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public License as
9 published by the Free Software Foundation; either version 2 of the
10 License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 Library General Public License for more details.
16
17 You should have received a copy of the GNU Library General Public
18 License along with the GNU C Library; see the file COPYING.LIB.  If
19 not, write to the Free Software Foundation, Inc., 675 Mass Ave,
20 Cambridge, MA 02139, USA.  */
21
22 /* Get the i386 definitions.  We will override some of them below.  */
23 #include_next <memcopy.h>
24
25
26 /* Written like this, the Pentium pipeline can execute the loop at a
27    sustained rate of 2 instructions/clock, or asymptotically 480
28    Mbytes/second at 60Mhz.  */
29
30 #undef  WORD_COPY_FWD
31 #define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes)              \
32   do                                                                    \
33     {                                                                   \
34       asm volatile ("subl       $32,%2\n"                               \
35                     "js         2f\n"                                   \
36                     "1:\n"                                              \
37                     "movl       0(%1),%%eax\n"                          \
38                     "movl       4(%1),%%edx\n"                          \
39                     "movl       %%eax,0(%0)\n"                          \
40                     "movl       %%edx,4(%0)\n"                          \
41                     "movl       8(%1),%%eax\n"                          \
42                     "movl       12(%1),%%edx\n"                         \
43                     "movl       %%eax,8(%0)\n"                          \
44                     "movl       %%edx,12(%0)\n"                         \
45                     "movl       16(%1),%%eax\n"                         \
46                     "movl       20(%1),%%edx\n"                         \
47                     "movl       %%eax,16(%0)\n"                         \
48                     "movl       %%edx,20(%0)\n"                         \
49                     "movl       24(%1),%%eax\n"                         \
50                     "movl       28(%1),%%edx\n"                         \
51                     "movl       %%eax,24(%0)\n"                         \
52                     "movl       %%edx,28(%0)\n"                         \
53                     "addl       $32,%1\n"                               \
54                     "addl       $32,%0\n"                               \
55                     "subl       $32,%2\n"                               \
56                     "jns        1b\n"                                   \
57                     "2: addl    $32,%2" :                               \
58                     "=r" (dst_bp), "=r" (src_bp), "=r" (nbytes_left) :  \
59                     "0" (dst_bp), "1" (src_bp), "2" (nbytes) :          \
60                     "ax", "dx");                                        \
61     } while (0)
62
63 #undef  WORD_COPY_BWD
64 #define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes)              \
65   do                                                                    \
66     {                                                                   \
67       asm volatile ("subl       $32,%2\n"                               \
68                     "js         2f\n"                                   \
69                     "1:\n"                                              \
70                     "movl       -4(%1),%%eax\n"                         \
71                     "movl       -8(%1),%%edx\n"                         \
72                     "movl       %%eax,-4(%0)\n"                         \
73                     "movl       %%edx,-8(%0)\n"                         \
74                     "movl       -12(%1),%%eax\n"                        \
75                     "movl       -16(%1),%%edx\n"                        \
76                     "movl       %%eax,-12(%0)\n"                        \
77                     "movl       %%edx,-16(%0)\n"                        \
78                     "movl       -20(%1),%%eax\n"                        \
79                     "movl       -24(%1),%%edx\n"                        \
80                     "movl       %%eax,-20(%0)\n"                        \
81                     "movl       %%edx,-24(%0)\n"                        \
82                     "movl       -28(%1),%%eax\n"                        \
83                     "movl       -32(%1),%%edx\n"                        \
84                     "movl       %%eax,-28(%0)\n"                        \
85                     "movl       %%edx,-32(%0)\n"                        \
86                     "subl       $32,%1\n"                               \
87                     "subl       $32,%0\n"                               \
88                     "subl       $32,%2\n"                               \
89                     "jns        1b\n"                                   \
90                     "2: addl    $32,%2" :                               \
91                     "=r" (dst_bp), "=r" (src_bp), "=r" (nbytes_left) :  \
92                     "0" (dst_bp), "1" (src_bp), "2" (nbytes) :          \
93                     "ax", "dx");                                        \
94     } while (0)