GEOS  3.8.0dev
ttmathuint_x86.h
Go to the documentation of this file.
1 /*
2  * This file is a part of TTMath Bignum Library
3  * and is distributed under the 3-Clause BSD Licence.
4  * Author: Tomasz Sowa <t.sowa@ttmath.org>
5  */
6 
7 /*
8  * Copyright (c) 2006-2009, Tomasz Sowa
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are met:
13  *
14  * * Redistributions of source code must retain the above copyright notice,
15  * this list of conditions and the following disclaimer.
16  *
17  * * Redistributions in binary form must reproduce the above copyright
18  * notice, this list of conditions and the following disclaimer in the
19  * documentation and/or other materials provided with the distribution.
20  *
21  * * Neither the name Tomasz Sowa nor the names of contributors to this
22  * project may be used to endorse or promote products derived
23  * from this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
35  * THE POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #ifndef headerfilettmathuint_x86
39 #define headerfilettmathuint_x86
40 
41 
50 #ifndef TTMATH_NOASM
51 #ifdef TTMATH_PLATFORM32
52 
53 
54 
55 
56 
60 namespace ttmath
61 {
62 
74  template<uint value_size>
75  const char * UInt<value_size>::LibTypeStr()
76  {
77  #ifndef __GNUC__
78  static const char info[] = "asm_vc_32";
79  #endif
80 
81  #ifdef __GNUC__
82  static const char info[] = "asm_gcc_32";
83  #endif
84 
85  return info;
86  }
87 
88 
92  template<uint value_size>
93  LibTypeCode UInt<value_size>::LibType()
94  {
95  #ifndef __GNUC__
96  LibTypeCode info = asm_vc_32;
97  #endif
98 
99  #ifdef __GNUC__
100  LibTypeCode info = asm_gcc_32;
101  #endif
102 
103  return info;
104  }
105 
106 
107 
122  template<uint value_size>
123  uint UInt<value_size>::Add(const UInt<value_size> & ss2, uint c)
124  {
125  uint b = value_size;
126  uint * p1 = table;
127  uint * p2 = const_cast<uint*>(ss2.table);
128 
129  // we don't have to use TTMATH_REFERENCE_ASSERT here
130  // this algorithm doesn't require it
131 
132  #ifndef __GNUC__
133 
134  // this part might be compiled with for example visual c
135 
136  __asm
137  {
138  push eax
139  push ebx
140  push ecx
141  push edx
142  push esi
143 
144  mov ecx,[b]
145 
146  mov ebx,[p1]
147  mov esi,[p2]
148 
149  xor edx,edx // edx=0
150  mov eax,[c]
151  neg eax // CF=1 if rax!=0 , CF=0 if rax==0
152 
153  ttmath_loop:
154  mov eax,[esi+edx*4]
155  adc [ebx+edx*4],eax
156 
157  inc edx
158  dec ecx
159  jnz ttmath_loop
160 
161  adc ecx, ecx
162  mov [c], ecx
163 
164  pop esi
165  pop edx
166  pop ecx
167  pop ebx
168  pop eax
169  }
170 
171 
172 
173  #endif
174 
175 
176  #ifdef __GNUC__
177  uint dummy, dummy2;
178  // this part should be compiled with gcc
179 
180  __asm__ __volatile__(
181 
182  "xorl %%edx, %%edx \n"
183  "negl %%eax \n" // CF=1 if rax!=0 , CF=0 if rax==0
184 
185  "1: \n"
186  "movl (%%esi,%%edx,4), %%eax \n"
187  "adcl %%eax, (%%ebx,%%edx,4) \n"
188 
189  "incl %%edx \n"
190  "decl %%ecx \n"
191  "jnz 1b \n"
192 
193  "adc %%ecx, %%ecx \n"
194 
195  : "=c" (c), "=a" (dummy), "=d" (dummy2)
196  : "0" (b), "1" (c), "b" (p1), "S" (p2)
197  : "cc", "memory" );
198  #endif
199 
200  TTMATH_LOGC("UInt::Add", c)
201 
202  return c;
203  }
204 
205 
206 
231  template<uint value_size>
232  uint UInt<value_size>::AddInt(uint value, uint index)
233  {
234  uint b = value_size;
235  uint * p1 = table;
236  uint c;
237 
238  TTMATH_ASSERT( index < value_size )
239 
240  #ifndef __GNUC__
241 
242  __asm
243  {
244  push eax
245  push ebx
246  push ecx
247  push edx
248 
249  mov ecx, [b]
250  sub ecx, [index]
251 
252  mov edx, [index]
253  mov ebx, [p1]
254 
255  mov eax, [value]
256 
257  ttmath_loop:
258  add [ebx+edx*4], eax
259  jnc ttmath_end
260 
261  mov eax, 1
262  inc edx
263  dec ecx
264  jnz ttmath_loop
265 
266  ttmath_end:
267  setc al
268  movzx edx, al
269  mov [c], edx
270 
271  pop edx
272  pop ecx
273  pop ebx
274  pop eax
275  }
276 
277  #endif
278 
279 
280  #ifdef __GNUC__
281  uint dummy, dummy2;
282 
283  __asm__ __volatile__(
284 
285  "subl %%edx, %%ecx \n"
286 
287  "1: \n"
288  "addl %%eax, (%%ebx,%%edx,4) \n"
289  "jnc 2f \n"
290 
291  "movl $1, %%eax \n"
292  "incl %%edx \n"
293  "decl %%ecx \n"
294  "jnz 1b \n"
295 
296  "2: \n"
297  "setc %%al \n"
298  "movzx %%al, %%edx \n"
299 
300  : "=d" (c), "=a" (dummy), "=c" (dummy2)
301  : "0" (index), "1" (value), "2" (b), "b" (p1)
302  : "cc", "memory" );
303 
304  #endif
305 
306  TTMATH_LOGC("UInt::AddInt", c)
307 
308  return c;
309  }
310 
311 
312 
313 
350  template<uint value_size>
351  uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index)
352  {
353  uint b = value_size;
354  uint * p1 = table;
355  uint c;
356 
357  TTMATH_ASSERT( index < value_size - 1 )
358 
359  #ifndef __GNUC__
360  __asm
361  {
362  push eax
363  push ebx
364  push ecx
365  push edx
366 
367  mov ecx, [b]
368  sub ecx, [index]
369 
370  mov ebx, [p1]
371  mov edx, [index]
372 
373  mov eax, [x1]
374  add [ebx+edx*4], eax
375  inc edx
376  dec ecx
377 
378  mov eax, [x2]
379 
380  ttmath_loop:
381  adc [ebx+edx*4], eax
382  jnc ttmath_end
383 
384  mov eax, 0
385  inc edx
386  dec ecx
387  jnz ttmath_loop
388 
389  ttmath_end:
390  setc al
391  movzx edx, al
392  mov [c], edx
393 
394  pop edx
395  pop ecx
396  pop ebx
397  pop eax
398 
399  }
400  #endif
401 
402 
403  #ifdef __GNUC__
404  uint dummy, dummy2;
405 
406  __asm__ __volatile__(
407 
408  "subl %%edx, %%ecx \n"
409 
410  "addl %%esi, (%%ebx,%%edx,4) \n"
411  "incl %%edx \n"
412  "decl %%ecx \n"
413 
414  "1: \n"
415  "adcl %%eax, (%%ebx,%%edx,4) \n"
416  "jnc 2f \n"
417 
418  "mov $0, %%eax \n"
419  "incl %%edx \n"
420  "decl %%ecx \n"
421  "jnz 1b \n"
422 
423  "2: \n"
424  "setc %%al \n"
425  "movzx %%al, %%eax \n"
426 
427  : "=a" (c), "=c" (dummy), "=d" (dummy2)
428  : "0" (x2), "1" (b), "2" (index), "b" (p1), "S" (x1)
429  : "cc", "memory" );
430 
431  #endif
432 
433  TTMATH_LOGC("UInt::AddTwoInts", c)
434 
435  return c;
436  }
437 
438 
439 
460  template<uint value_size>
461  uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
462  {
463  TTMATH_ASSERT( ss1_size >= ss2_size )
464 
465  uint rest = ss1_size - ss2_size;
466  uint c;
467 
468  #ifndef __GNUC__
469 
470  // this part might be compiled with for example visual c
471  __asm
472  {
473  pushad
474 
475  mov ecx, [ss2_size]
476  xor edx, edx // edx = 0, cf = 0
477 
478  mov esi, [ss1]
479  mov ebx, [ss2]
480  mov edi, [result]
481 
482  ttmath_loop:
483  mov eax, [esi+edx*4]
484  adc eax, [ebx+edx*4]
485  mov [edi+edx*4], eax
486 
487  inc edx
488  dec ecx
489  jnz ttmath_loop
490 
491  adc ecx, ecx // ecx has the cf state
492 
493  mov ebx, [rest]
494  or ebx, ebx
495  jz ttmath_end
496 
497  xor ebx, ebx // ebx = 0
498  neg ecx // setting cf from ecx
499  mov ecx, [rest] // ecx is != 0
500 
501  ttmath_loop2:
502  mov eax, [esi+edx*4]
503  adc eax, ebx
504  mov [edi+edx*4], eax
505 
506  inc edx
507  dec ecx
508  jnz ttmath_loop2
509 
510  adc ecx, ecx
511 
512  ttmath_end:
513  mov [c], ecx
514 
515  popad
516  }
517 
518  #endif
519 
520 
521  #ifdef __GNUC__
522 
523  // this part should be compiled with gcc
524  uint dummy1, dummy2, dummy3;
525 
526  __asm__ __volatile__(
527  "push %%edx \n"
528  "xor %%edx, %%edx \n" // edx = 0, cf = 0
529  "1: \n"
530  "mov (%%esi,%%edx,4), %%eax \n"
531  "adc (%%ebx,%%edx,4), %%eax \n"
532  "mov %%eax, (%%edi,%%edx,4) \n"
533 
534  "inc %%edx \n"
535  "dec %%ecx \n"
536  "jnz 1b \n"
537 
538  "adc %%ecx, %%ecx \n" // ecx has the cf state
539  "pop %%eax \n" // eax = rest
540 
541  "or %%eax, %%eax \n"
542  "jz 3f \n"
543 
544  "xor %%ebx, %%ebx \n" // ebx = 0
545  "neg %%ecx \n" // setting cf from ecx
546  "mov %%eax, %%ecx \n" // ecx=rest and is != 0
547  "2: \n"
548  "mov (%%esi, %%edx, 4), %%eax \n"
549  "adc %%ebx, %%eax \n"
550  "mov %%eax, (%%edi, %%edx, 4) \n"
551 
552  "inc %%edx \n"
553  "dec %%ecx \n"
554  "jnz 2b \n"
555 
556  "adc %%ecx, %%ecx \n"
557  "3: \n"
558 
559  : "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
560  : "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
561  : "cc", "memory" );
562 
563  #endif
564 
565  TTMATH_VECTOR_LOGC("UInt::AddVector", c, result, ss1_size)
566 
567  return c;
568  }
569 
570 
579  template<uint value_size>
580  uint UInt<value_size>::Sub(const UInt<value_size> & ss2, uint c)
581  {
582  uint b = value_size;
583  uint * p1 = table;
584  uint * p2 = const_cast<uint*>(ss2.table);
585 
586  // we don't have to use TTMATH_REFERENCE_ASSERT here
587  // this algorithm doesn't require it
588 
589  #ifndef __GNUC__
590 
591  __asm
592  {
593  push eax
594  push ebx
595  push ecx
596  push edx
597  push esi
598 
599  mov ecx,[b]
600 
601  mov ebx,[p1]
602  mov esi,[p2]
603 
604  xor edx,edx // edx=0
605  mov eax,[c]
606  neg eax // CF=1 if rax!=0 , CF=0 if rax==0
607 
608  ttmath_loop:
609  mov eax,[esi+edx*4]
610  sbb [ebx+edx*4],eax
611 
612  inc edx
613  dec ecx
614  jnz ttmath_loop
615 
616  adc ecx, ecx
617  mov [c], ecx
618 
619  pop esi
620  pop edx
621  pop ecx
622  pop ebx
623  pop eax
624  }
625 
626  #endif
627 
628 
629  #ifdef __GNUC__
630  uint dummy, dummy2;
631 
632  __asm__ __volatile__(
633 
634  "xorl %%edx, %%edx \n"
635  "negl %%eax \n" // CF=1 if rax!=0 , CF=0 if rax==0
636 
637  "1: \n"
638  "movl (%%esi,%%edx,4), %%eax \n"
639  "sbbl %%eax, (%%ebx,%%edx,4) \n"
640 
641  "incl %%edx \n"
642  "decl %%ecx \n"
643  "jnz 1b \n"
644 
645  "adc %%ecx, %%ecx \n"
646 
647  : "=c" (c), "=a" (dummy), "=d" (dummy2)
648  : "0" (b), "1" (c), "b" (p1), "S" (p2)
649  : "cc", "memory" );
650 
651  #endif
652 
653  TTMATH_LOGC("UInt::Sub", c)
654 
655  return c;
656  }
657 
658 
659 
660 
685  template<uint value_size>
686  uint UInt<value_size>::SubInt(uint value, uint index)
687  {
688  uint b = value_size;
689  uint * p1 = table;
690  uint c;
691 
692  TTMATH_ASSERT( index < value_size )
693 
694  #ifndef __GNUC__
695 
696  __asm
697  {
698  push eax
699  push ebx
700  push ecx
701  push edx
702 
703  mov ecx, [b]
704  sub ecx, [index]
705 
706  mov edx, [index]
707  mov ebx, [p1]
708 
709  mov eax, [value]
710 
711  ttmath_loop:
712  sub [ebx+edx*4], eax
713  jnc ttmath_end
714 
715  mov eax, 1
716  inc edx
717  dec ecx
718  jnz ttmath_loop
719 
720  ttmath_end:
721  setc al
722  movzx edx, al
723  mov [c], edx
724 
725  pop edx
726  pop ecx
727  pop ebx
728  pop eax
729  }
730 
731  #endif
732 
733 
734  #ifdef __GNUC__
735  uint dummy, dummy2;
736 
737  __asm__ __volatile__(
738 
739  "subl %%edx, %%ecx \n"
740 
741  "1: \n"
742  "subl %%eax, (%%ebx,%%edx,4) \n"
743  "jnc 2f \n"
744 
745  "movl $1, %%eax \n"
746  "incl %%edx \n"
747  "decl %%ecx \n"
748  "jnz 1b \n"
749 
750  "2: \n"
751  "setc %%al \n"
752  "movzx %%al, %%edx \n"
753 
754  : "=d" (c), "=a" (dummy), "=c" (dummy2)
755  : "0" (index), "1" (value), "2" (b), "b" (p1)
756  : "cc", "memory" );
757 
758  #endif
759 
760  TTMATH_LOGC("UInt::SubInt", c)
761 
762  return c;
763  }
764 
765 
766 
788  template<uint value_size>
789  uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
790  {
791  TTMATH_ASSERT( ss1_size >= ss2_size )
792 
793  uint rest = ss1_size - ss2_size;
794  uint c;
795 
796  #ifndef __GNUC__
797 
798  // this part might be compiled with for example visual c
799 
800  /*
801  the asm code is nearly the same as in AddVector
802  only two instructions 'adc' are changed to 'sbb'
803  */
804  __asm
805  {
806  pushad
807 
808  mov ecx, [ss2_size]
809  xor edx, edx // edx = 0, cf = 0
810 
811  mov esi, [ss1]
812  mov ebx, [ss2]
813  mov edi, [result]
814 
815  ttmath_loop:
816  mov eax, [esi+edx*4]
817  sbb eax, [ebx+edx*4]
818  mov [edi+edx*4], eax
819 
820  inc edx
821  dec ecx
822  jnz ttmath_loop
823 
824  adc ecx, ecx // ecx has the cf state
825 
826  mov ebx, [rest]
827  or ebx, ebx
828  jz ttmath_end
829 
830  xor ebx, ebx // ebx = 0
831  neg ecx // setting cf from ecx
832  mov ecx, [rest] // ecx is != 0
833 
834  ttmath_loop2:
835  mov eax, [esi+edx*4]
836  sbb eax, ebx
837  mov [edi+edx*4], eax
838 
839  inc edx
840  dec ecx
841  jnz ttmath_loop2
842 
843  adc ecx, ecx
844 
845  ttmath_end:
846  mov [c], ecx
847 
848  popad
849  }
850 
851  #endif
852 
853 
854  #ifdef __GNUC__
855 
856  // this part should be compiled with gcc
857  uint dummy1, dummy2, dummy3;
858 
859  __asm__ __volatile__(
860  "push %%edx \n"
861  "xor %%edx, %%edx \n" // edx = 0, cf = 0
862  "1: \n"
863  "mov (%%esi,%%edx,4), %%eax \n"
864  "sbb (%%ebx,%%edx,4), %%eax \n"
865  "mov %%eax, (%%edi,%%edx,4) \n"
866 
867  "inc %%edx \n"
868  "dec %%ecx \n"
869  "jnz 1b \n"
870 
871  "adc %%ecx, %%ecx \n" // ecx has the cf state
872  "pop %%eax \n" // eax = rest
873 
874  "or %%eax, %%eax \n"
875  "jz 3f \n"
876 
877  "xor %%ebx, %%ebx \n" // ebx = 0
878  "neg %%ecx \n" // setting cf from ecx
879  "mov %%eax, %%ecx \n" // ecx=rest and is != 0
880  "2: \n"
881  "mov (%%esi, %%edx, 4), %%eax \n"
882  "sbb %%ebx, %%eax \n"
883  "mov %%eax, (%%edi, %%edx, 4) \n"
884 
885  "inc %%edx \n"
886  "dec %%ecx \n"
887  "jnz 2b \n"
888 
889  "adc %%ecx, %%ecx \n"
890  "3: \n"
891 
892  : "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
893  : "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
894  : "cc", "memory" );
895 
896  #endif
897 
898  TTMATH_VECTOR_LOGC("UInt::SubVector", c, result, ss1_size)
899 
900  return c;
901  }
902 
903 
904 
917  template<uint value_size>
918  uint UInt<value_size>::Rcl2_one(uint c)
919  {
920  uint b = value_size;
921  uint * p1 = table;
922 
923  #ifndef __GNUC__
924  __asm
925  {
926  push ebx
927  push ecx
928  push edx
929 
930  mov ebx, [p1]
931  xor edx, edx
932  mov ecx, [c]
933  neg ecx
934  mov ecx, [b]
935 
936  ttmath_loop:
937  rcl dword ptr [ebx+edx*4], 1
938 
939  inc edx
940  dec ecx
941  jnz ttmath_loop
942 
943  adc ecx, ecx
944  mov [c], ecx
945 
946  pop edx
947  pop ecx
948  pop ebx
949  }
950  #endif
951 
952 
953  #ifdef __GNUC__
954  uint dummy, dummy2;
955 
956  __asm__ __volatile__(
957 
958  "xorl %%edx, %%edx \n" // edx=0
959  "negl %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0
960 
961  "1: \n"
962  "rcll $1, (%%ebx, %%edx, 4) \n"
963 
964  "incl %%edx \n"
965  "decl %%ecx \n"
966  "jnz 1b \n"
967 
968  "adcl %%ecx, %%ecx \n"
969 
970  : "=c" (c), "=a" (dummy), "=d" (dummy2)
971  : "0" (b), "1" (c), "b" (p1)
972  : "cc", "memory" );
973 
974  #endif
975 
976  TTMATH_LOGC("UInt::Rcl2_one", c)
977 
978  return c;
979  }
980 
981 
982 
995  template<uint value_size>
996  uint UInt<value_size>::Rcr2_one(uint c)
997  {
998  uint b = value_size;
999  uint * p1 = table;
1000 
1001  #ifndef __GNUC__
1002  __asm
1003  {
1004  push ebx
1005  push ecx
1006 
1007  mov ebx, [p1]
1008  mov ecx, [c]
1009  neg ecx
1010  mov ecx, [b]
1011 
1012  ttmath_loop:
1013  rcr dword ptr [ebx+ecx*4-4], 1
1014 
1015  dec ecx
1016  jnz ttmath_loop
1017 
1018  adc ecx, ecx
1019  mov [c], ecx
1020 
1021  pop ecx
1022  pop ebx
1023  }
1024  #endif
1025 
1026 
1027  #ifdef __GNUC__
1028  uint dummy;
1029 
1030  __asm__ __volatile__(
1031 
1032  "negl %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0
1033 
1034  "1: \n"
1035  "rcrl $1, -4(%%ebx, %%ecx, 4) \n"
1036 
1037  "decl %%ecx \n"
1038  "jnz 1b \n"
1039 
1040  "adcl %%ecx, %%ecx \n"
1041 
1042  : "=c" (c), "=a" (dummy)
1043  : "0" (b), "1" (c), "b" (p1)
1044  : "cc", "memory" );
1045 
1046  #endif
1047 
1048  TTMATH_LOGC("UInt::Rcr2_one", c)
1049 
1050  return c;
1051  }
1052 
1053 
1054 
1055 #ifdef _MSC_VER
1056 #pragma warning (disable : 4731)
1057 //warning C4731: frame pointer register 'ebp' modified by inline assembly code
1058 #endif
1059 
1060 
1061 
1074  template<uint value_size>
1075  uint UInt<value_size>::Rcl2(uint bits, uint c)
1076  {
1077  TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
1078 
1079  uint b = value_size;
1080  uint * p1 = table;
1081 
1082  #ifndef __GNUC__
1083  __asm
1084  {
1085  push eax
1086  push ebx
1087  push ecx
1088  push edx
1089  push esi
1090  push edi
1091  push ebp
1092 
1093  mov edi, [b]
1094 
1095  mov ecx, 32
1096  sub ecx, [bits]
1097  mov edx, -1
1098  shr edx, cl
1099 
1100  mov ecx, [bits]
1101  mov ebx, [p1]
1102  mov eax, [c]
1103 
1104  mov ebp, edx // ebp = mask (modified ebp - don't read/write to variables)
1105 
1106  xor edx, edx // edx = 0
1107  mov esi, edx
1108  or eax, eax
1109  cmovnz esi, ebp // if(c) esi=mask else esi=0
1110 
1111  ttmath_loop:
1112  rol dword ptr [ebx+edx*4], cl
1113 
1114  mov eax, [ebx+edx*4]
1115  and eax, ebp
1116  xor [ebx+edx*4], eax // clearing bits
1117  or [ebx+edx*4], esi // saving old value
1118  mov esi, eax
1119 
1120  inc edx
1121  dec edi
1122  jnz ttmath_loop
1123 
1124  pop ebp // restoring ebp
1125 
1126  and eax, 1
1127  mov [c], eax
1128 
1129  pop edi
1130  pop esi
1131  pop edx
1132  pop ecx
1133  pop ebx
1134  pop eax
1135  }
1136  #endif
1137 
1138 
1139  #ifdef __GNUC__
1140  uint dummy, dummy2, dummy3;
1141 
1142  __asm__ __volatile__(
1143 
1144  "push %%ebp \n"
1145 
1146  "movl %%ecx, %%esi \n"
1147  "movl $32, %%ecx \n"
1148  "subl %%esi, %%ecx \n" // ecx = 32 - bits
1149  "movl $-1, %%edx \n" // edx = -1 (all bits set to one)
1150  "shrl %%cl, %%edx \n" // shifting (0 -> edx -> cf) (cl times)
1151  "movl %%edx, %%ebp \n" // ebp = edx = mask
1152  "movl %%esi, %%ecx \n"
1153 
1154  "xorl %%edx, %%edx \n"
1155  "movl %%edx, %%esi \n"
1156  "orl %%eax, %%eax \n"
1157  "cmovnz %%ebp, %%esi \n" // if(c) esi=mask else esi=0
1158 
1159  "1: \n"
1160  "roll %%cl, (%%ebx,%%edx,4) \n"
1161 
1162  "movl (%%ebx,%%edx,4), %%eax \n"
1163  "andl %%ebp, %%eax \n"
1164  "xorl %%eax, (%%ebx,%%edx,4) \n"
1165  "orl %%esi, (%%ebx,%%edx,4) \n"
1166  "movl %%eax, %%esi \n"
1167 
1168  "incl %%edx \n"
1169  "decl %%edi \n"
1170  "jnz 1b \n"
1171 
1172  "and $1, %%eax \n"
1173 
1174  "pop %%ebp \n"
1175 
1176  : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
1177  : "0" (c), "1" (b), "b" (p1), "c" (bits)
1178  : "cc", "memory" );
1179 
1180  #endif
1181 
1182  TTMATH_LOGC("UInt::Rcl2", c)
1183 
1184  return c;
1185  }
1186 
1187 
1188 
1189 
1202  template<uint value_size>
1203  uint UInt<value_size>::Rcr2(uint bits, uint c)
1204  {
1205  TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
1206 
1207  uint b = value_size;
1208  uint * p1 = table;
1209 
1210  #ifndef __GNUC__
1211  __asm
1212  {
1213  push eax
1214  push ebx
1215  push ecx
1216  push edx
1217  push esi
1218  push edi
1219  push ebp
1220 
1221  mov edi, [b]
1222 
1223  mov ecx, 32
1224  sub ecx, [bits]
1225  mov edx, -1
1226  shl edx, cl
1227 
1228  mov ecx, [bits]
1229  mov ebx, [p1]
1230  mov eax, [c]
1231 
1232  mov ebp, edx // ebp = mask (modified ebp - don't read/write to variables)
1233 
1234  xor edx, edx // edx = 0
1235  mov esi, edx
1236  add edx, edi
1237  dec edx // edx is pointing at the end of the table (on last word)
1238  or eax, eax
1239  cmovnz esi, ebp // if(c) esi=mask else esi=0
1240 
1241  ttmath_loop:
1242  ror dword ptr [ebx+edx*4], cl
1243 
1244  mov eax, [ebx+edx*4]
1245  and eax, ebp
1246  xor [ebx+edx*4], eax // clearing bits
1247  or [ebx+edx*4], esi // saving old value
1248  mov esi, eax
1249 
1250  dec edx
1251  dec edi
1252  jnz ttmath_loop
1253 
1254  pop ebp // restoring ebp
1255 
1256  rol eax, 1 // 31bit will be first
1257  and eax, 1
1258  mov [c], eax
1259 
1260  pop edi
1261  pop esi
1262  pop edx
1263  pop ecx
1264  pop ebx
1265  pop eax
1266  }
1267  #endif
1268 
1269 
1270  #ifdef __GNUC__
1271  uint dummy, dummy2, dummy3;
1272 
1273  __asm__ __volatile__(
1274 
1275  "push %%ebp \n"
1276 
1277  "movl %%ecx, %%esi \n"
1278  "movl $32, %%ecx \n"
1279  "subl %%esi, %%ecx \n" // ecx = 32 - bits
1280  "movl $-1, %%edx \n" // edx = -1 (all bits set to one)
1281  "shll %%cl, %%edx \n" // shifting (cf <- edx <- 0) (cl times)
1282  "movl %%edx, %%ebp \n" // ebp = edx = mask
1283  "movl %%esi, %%ecx \n"
1284 
1285  "xorl %%edx, %%edx \n"
1286  "movl %%edx, %%esi \n"
1287  "addl %%edi, %%edx \n"
1288  "decl %%edx \n" // edx is pointing at the end of the table (on last word)
1289  "orl %%eax, %%eax \n"
1290  "cmovnz %%ebp, %%esi \n" // if(c) esi=mask else esi=0
1291 
1292  "1: \n"
1293  "rorl %%cl, (%%ebx,%%edx,4) \n"
1294 
1295  "movl (%%ebx,%%edx,4), %%eax \n"
1296  "andl %%ebp, %%eax \n"
1297  "xorl %%eax, (%%ebx,%%edx,4) \n"
1298  "orl %%esi, (%%ebx,%%edx,4) \n"
1299  "movl %%eax, %%esi \n"
1300 
1301  "decl %%edx \n"
1302  "decl %%edi \n"
1303  "jnz 1b \n"
1304 
1305  "roll $1, %%eax \n"
1306  "andl $1, %%eax \n"
1307 
1308  "pop %%ebp \n"
1309 
1310  : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
1311  : "0" (c), "1" (b), "b" (p1), "c" (bits)
1312  : "cc", "memory" );
1313 
1314  #endif
1315 
1316  TTMATH_LOGC("UInt::Rcr2", c)
1317 
1318  return c;
1319  }
1320 
1321 
1322 #ifdef _MSC_VER
1323 #pragma warning (default : 4731)
1324 #endif
1325 
1326 
1327  /*
1328  this method returns the number of the highest set bit in one 32-bit word
1329  if the 'x' is zero this method returns '-1'
1330  */
1331  template<uint value_size>
1332  sint UInt<value_size>::FindLeadingBitInWord(uint x)
1333  {
1334  sint result;
1335 
1336  #ifndef __GNUC__
1337  __asm
1338  {
1339  push eax
1340  push edx
1341 
1342  mov edx,-1
1343  bsr eax,[x]
1344  cmovz eax,edx
1345  mov [result], eax
1346 
1347  pop edx
1348  pop eax
1349  }
1350  #endif
1351 
1352 
1353  #ifdef __GNUC__
1354  uint dummy;
1355 
1356  __asm__ (
1357 
1358  "movl $-1, %1 \n"
1359  "bsrl %2, %0 \n"
1360  "cmovz %1, %0 \n"
1361 
1362  : "=r" (result), "=&r" (dummy)
1363  : "r" (x)
1364  : "cc" );
1365 
1366  #endif
1367 
1368  return result;
1369  }
1370 
1371 
1372 
1373  /*
1374  this method returns the number of the smallest set bit in one 32-bit word
1375  if the 'x' is zero this method returns '-1'
1376  */
1377  template<uint value_size>
1378  sint UInt<value_size>::FindLowestBitInWord(uint x)
1379  {
1380  sint result;
1381 
1382  #ifndef __GNUC__
1383  __asm
1384  {
1385  push eax
1386  push edx
1387 
1388  mov edx,-1
1389  bsf eax,[x]
1390  cmovz eax,edx
1391  mov [result], eax
1392 
1393  pop edx
1394  pop eax
1395  }
1396  #endif
1397 
1398 
1399  #ifdef __GNUC__
1400  uint dummy;
1401 
1402  __asm__ (
1403 
1404  "movl $-1, %1 \n"
1405  "bsfl %2, %0 \n"
1406  "cmovz %1, %0 \n"
1407 
1408  : "=r" (result), "=&r" (dummy)
1409  : "r" (x)
1410  : "cc" );
1411 
1412  #endif
1413 
1414  return result;
1415  }
1416 
1417 
1418 
1430  template<uint value_size>
1431  uint UInt<value_size>::SetBitInWord(uint & value, uint bit)
1432  {
1433  TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT )
1434 
1435  uint old_bit;
1436  uint v = value;
1437 
1438  #ifndef __GNUC__
1439  __asm
1440  {
1441  push ebx
1442  push eax
1443 
1444  mov eax, [v]
1445  mov ebx, [bit]
1446  bts eax, ebx
1447  mov [v], eax
1448 
1449  setc bl
1450  movzx ebx, bl
1451  mov [old_bit], ebx
1452 
1453  pop eax
1454  pop ebx
1455  }
1456  #endif
1457 
1458 
1459  #ifdef __GNUC__
1460  __asm__ (
1461 
1462  "btsl %%ebx, %%eax \n"
1463  "setc %%bl \n"
1464  "movzx %%bl, %%ebx \n"
1465 
1466  : "=a" (v), "=b" (old_bit)
1467  : "0" (v), "1" (bit)
1468  : "cc" );
1469 
1470  #endif
1471 
1472  value = v;
1473 
1474  return old_bit;
1475  }
1476 
1477 
1478 
1479 
1488  template<uint value_size>
1489  void UInt<value_size>::MulTwoWords(uint a, uint b, uint * result_high, uint * result_low)
1490  {
1491  /*
1492  we must use these temporary variables in order to inform the compilator
1493  that value pointed with result1 and result2 has changed
1494 
1495  this has no effect in visual studio but it's useful when
1496  using gcc and options like -Ox
1497  */
1498  uint result1_;
1499  uint result2_;
1500 
1501  #ifndef __GNUC__
1502 
1503  __asm
1504  {
1505  push eax
1506  push edx
1507 
1508  mov eax, [a]
1509  mul dword ptr [b]
1510 
1511  mov [result2_], edx
1512  mov [result1_], eax
1513 
1514  pop edx
1515  pop eax
1516  }
1517 
1518  #endif
1519 
1520 
1521  #ifdef __GNUC__
1522 
1523  __asm__ (
1524 
1525  "mull %%edx \n"
1526 
1527  : "=a" (result1_), "=d" (result2_)
1528  : "0" (a), "1" (b)
1529  : "cc" );
1530 
1531  #endif
1532 
1533 
1534  *result_low = result1_;
1535  *result_high = result2_;
1536  }
1537 
1538 
1539 
1540 
1541 
1563  template<uint value_size>
1564  void UInt<value_size>::DivTwoWords(uint a, uint b, uint c, uint * r, uint * rest)
1565  {
1566  uint r_;
1567  uint rest_;
1568  /*
1569  these variables have similar meaning like those in
1570  the multiplication algorithm MulTwoWords
1571  */
1572 
1573  TTMATH_ASSERT( c != 0 )
1574 
1575  #ifndef __GNUC__
1576  __asm
1577  {
1578  push eax
1579  push edx
1580 
1581  mov edx, [a]
1582  mov eax, [b]
1583  div dword ptr [c]
1584 
1585  mov [r_], eax
1586  mov [rest_], edx
1587 
1588  pop edx
1589  pop eax
1590  }
1591  #endif
1592 
1593 
1594  #ifdef __GNUC__
1595 
1596  __asm__ (
1597 
1598  "divl %%ecx \n"
1599 
1600  : "=a" (r_), "=d" (rest_)
1601  : "0" (b), "1" (a), "c" (c)
1602  : "cc" );
1603 
1604  #endif
1605 
1606 
1607  *r = r_;
1608  *rest = rest_;
1609 
1610  }
1611 
1612 
1613 
1614 } //namespace
1615 
1616 
1617 
1618 #endif //ifdef TTMATH_PLATFORM32
1619 #endif //ifndef TTMATH_NOASM
1620 #endif
a namespace for the TTMath library
Definition: ttmath.h:62
LibTypeCode
Definition: ttmathtypes.h:368
#define TTMATH_BITS_PER_UINT
Definition: ttmathtypes.h:207
unsigned int uint
Definition: ttmathtypes.h:186