Analytics Template Library
 All Classes Namespaces Functions Variables Pages
clfmalloc.h
1 /*
2  * (c) Copyright 2007, IBM Corporation.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*****************************************************************************
18  * Contact: Maged M Michael <magedm@us.ibm.com>
19  ****************************************************************************/
20 //#define DEBUGTRACE
21 
22 #ifndef CLMALLOC_H
23 #define CLMALLOC_H
24 
25 #ifdef __cplusplus
26 extern "C" {
27 #endif
28 
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <assert.h>
32 #include <pthread.h>
33 #include <string.h>
34 #include <sys/mman.h>
35 
36 
37 
38 
39 #define IA32_LINUX_GCC
40 
41 
42 
43 
44 #ifdef PPC32_AIX_XLC
45 #define PPC
46 #define BIT32
47 #define AIX
48 #define XLC
49 #endif
50 
51 #ifdef PPC32_LINUX_GCC
52 #define PPC
53 #define BIT32
54 #define LINUX
55 #define GCC
56 #endif
57 
58 #ifdef PPC64_AIX_XLC
59 #define PPC
60 #define BIT64
61 #define AIX
62 #define XLC
63 #endif
64 
65 #ifdef PPC64_AIX_GCC
66 #define PPC
67 #define BIT64
68 #define AIX
69 #define GCC
70 #endif
71 
72 #ifdef PPC64_LINUX_GCC
73 #define PPC
74 #define BIT64
75 #define LINUX
76 #define GCC
77 #endif
78 
79 #ifdef AMD64_LINUX_GCC
80 #define X86
81 #define BIT64
82 #define LINUX
83 #define GCC
84 
85  //if there is wider cas than length of machine word, such as CMPXCHG8/16
86 #define CASD
87 #endif
88 
89 #ifdef IA32_LINUX_GCC
90 #define X86
91 #define BIT32
92 #define LINUX
93 #define GCC
94 
95 #define CASD
96 #endif
97 
98 #ifdef SPARC32_SOLARIS_SCC
99 #define SPARC
100 #define BIT32
101 #define SOLARIS
102 #define SCC
103 #endif
104 
105  /*---------------------------------------------------------------------------*/
106 #define FALSE 0
107 #define TRUE 1
108 #define EIGHT 8
109  /*---------------------------------------------------------------------------*/
110 #define STATS(x)
111 #ifndef MIN
112 #define MIN(x,y) ((x)<=(y)?(x):(y))
113 #endif
114 #define MMAP(size) \
115  mmap(0,size,PROT_READ|PROT_WRITE,MAP_ANON|MAP_PRIVATE,-1,0)
116  /*---------------------------------------------------------------------------*/
117 #if defined(X86) || defined(SPARC)
118 #define REFCOUNT
119 #endif
120  /*---------------------------------------------------------------------------*/
121  //#define DEBUGTRACE
122  /*---------------------------------------------------------------------------*/
123 #ifdef DEBUGTRACE
124 #define ASSERT(x) if (!(x)) { debug(); assert(x);}
125 #else
126  //#define ASSERT(x) assert(x)
127 #define ASSERT(x)
128 #endif
129  /*---------------------------------------------------------------------------*/
130  /*---------------------------------------------------------------------------*/
131 #define IFUNI
132  //#define IFUNI if (!uniproc)
133  /*---------------------------------------------------------------------------*/
134 #if defined(X86) || defined(SPARC)
135 #define ret_t char
136 #define rc_ret_t char
137 #else
138 #define ret_t void *
139 #define rc_ret_t unsigned
140 #endif
141  /*---------------------------------------------------------------------------*/
142 #if defined(PPC)
143 
144 #define ISYNC IFUNI asm volatile ("isync")
145 #define LWSYNC IFUNI asm volatile ("lwsync")
146  //#define SYNC IFUNI asm volatile ("sync")
147 #define RBR
148 #define RBW
149 #define WBW
150 
151 #define LL64(ret,addr) asm volatile ("ldarx %0,0,%1;":"=r"(ret):"r"(addr))
152 #define LL32(ret,addr) asm volatile ("lwarx %0,0,%1;":"=r"(ret):"r"(addr))
153 #define SC64(ret,addr,new_val) \
154  asm volatile ( \
155  " stdcx. %2,0,%1;\n" \
156  " mfcr %0;\n" \
157  " andis. %0,%0,0x2000;" \
158  : "=r"(ret):"r"(addr),"r"(new_val) \
159  : "cr0","memory")
160 #define SC32(ret,addr,new_val) \
161  asm volatile ( \
162  " stwcx. %2,0,%1;\n" \
163  " mfcr %0;\n" \
164  " andis. %0,%0,0x2000;" \
165  : "=r"(ret):"r"(addr),"r"(new_val) \
166  : "cr0","memory")
167 #define CAS64(ret,addr,expval,new_val) \
168  do { \
169  unsigned long long oldval; \
170  LL64(oldval,addr); \
171  if (oldval != (unsigned long long)expval) { ret = 0; break; } \
172  SC64(ret,addr,new_val); \
173  } while (!ret);
174 #define CAS32(ret,addr,expval,new_val) \
175  do { \
176  unsigned oldval; \
177  LL32(oldval,addr); \
178  if (oldval != (unsigned)expval) { ret = 0; break; } \
179  SC32(ret,addr,new_val); \
180  } while (!ret);
181 
182 #ifdef BIT64
183 
184 #define LL(ret,addr) LL64(ret,addr)
185 #define SC(ret,addr,new_val) SC64(ret,addr,new_val)
186 #define CAS(ret,addr,expval,new_val) CAS64(ret,addr,expval,new_val)
187 #define ptrsize_t unsigned long
188 
189 #else /* PPC32 */
190 
191 #define LL(ret,addr) LL32(ret,addr)
192 #define SC(ret,addr,new_val) SC32(ret,addr,new_val)
193 #define CAS(ret,addr,expval,new_val) CAS32(ret,addr,expval,new_val)
194 #define ptrsize_t unsigned
195 
196 #endif
197 
198 #define anchorsize_t ptrsize_t
199 #define LLA(ret,addr) LL(ret,addr)
200 #define SCA(ret,addr,expval,new_val) SC(ret,addr,new_val)
201 
202 #elif defined(X86)
203 
204 #define CAS32(ret,addr,expval,new_val) \
205  __asm__ __volatile__( \
206  "lock; cmpxchg %2, %1; setz %0;\n" \
207  : "=a"(ret), "=m"(*(addr)) \
208  : "r" (new_val), "a"(expval) \
209  : "cc")
210 
211  char cas64(unsigned long long volatile * addr,
212  unsigned long long oval,
213  unsigned long long nval);
214 
215 #define CAS64(ret,addr,expval,new_val) \
216  ret = cas64((unsigned long long volatile *)addr, \
217  (unsigned long long)expval, \
218  (unsigned long long)new_val)
219 
220 #define ISYNC
221 #define LWSYNC
222 #define SYNC
223  // IFUNI asm volatile ("mfence")
224 #define RBR
225 #define RBW
226 #define WBW
227 
228 #ifdef BIT32
229 
230 #define ptrsize_t unsigned long
231 #define CAS(ret,addr,expval,new_val) CAS32(ret,addr,expval,new_val)
232 #define LLA(ret,addr) ret = *(addr);
233 #define SCA(ret,addr,expval,new_val) CAS32(ret,addr,expval,new_val)
234 #define anchorsize_t unsigned long
235 
236 #else /* X64 */
237 
238 #define ptrsize_t unsigned long long
239 #define CAS(ret,addr,expval,new_val) CAS64(ret,addr,expval,new_val)
240 #define LLA(ret,addr) ret = *(addr);
241 #define SCA(ret,addr,expval,new_val) CAS64(ret,addr,expval,new_val)
242 #define anchorsize_t unsigned long long
243 
244 #endif /* X86 32 and 64 */
245 
246 #elif defined(SPARC)
247 
248 #define CAS32(ret,addr,expval,new_val) \
249  ret = cas32((unsigned volatile *)addr,(unsigned)expval,(unsigned)new_val)
250 
251  static char cas32(unsigned volatile * addr, unsigned expval, unsigned new_val) {
252  asm volatile ( \
253  "casa [%2] 0x80, %3, %1\n" \
254  : "+m"(*(addr)), "+r"(new_val) \
255  : "r"(addr), "r" (expval) \
256  );
257  return (new_val == expval);
258  }
259 
260 #define CAS64(ret,addr,expval,new_val) \
261  ret = cas64((unsigned long long volatile *)addr,(unsigned long long)expval,(unsigned long long)new_val)
262 
263  static char cas64(unsigned long long volatile * addr, unsigned long long expval, unsigned long long new_val) {
264  asm volatile ( \
265  "casxa [%2] 0x80, %3, %1\n" \
266  : "+m"(*(addr)), "+r"(new_val) \
267  : "r"(addr), "r" (expval) \
268  );
269  return (new_val == expval);
270  }
271 
272 #define ISYNC
273 #define LWSYNC
274 #define SYNC
275 #define RBR IFUNI asm volatile ("membar #LoadLoad")
276 #define RBW IFUNI asm volatile ("membar #LoadStore")
277 #define WBW IFUNI asm volatile ("membar #StoreStore")
278 
279 #ifdef BIT32
280 
281 #define ptrsize_t unsigned
282 #define CAS(ret,addr,expval,new_val) CAS32(ret,addr,expval,new_val)
283 #define LLA(ret,addr) ret = *(addr);
284 #define SCA(ret,addr,expval,new_val) CAS32(ret,addr,expval,new_val)
285 #define anchorsize_t unsigned
286 
287 #else /* SPARC64 */
288 
289 #define ptrsize_t unsigned long long
290 #define CAS(ret,addr,expval,new_val) CAS64(ret,addr,expval,new_val)
291 #define LLA(ret,addr) ret = *(addr);
292 #define SCA(ret,addr,expval,new_val) CAS64(ret,addr,expval,new_val)
293 #define anchorsize_t unsigned long long
294 
295 #endif /* SPARC32 v SPARC64 */
296 
297 #else
298 #error architecture not defined
299 #endif /* architecture */
300  /*---------------------------------------------------------------------------*/
301 #if defined(X86) || defined(SPARC)
302 #define aret_t char
303 #else
304 #define aret_t void *
305 #endif
306  /*---------------------------------------------------------------------------*/
307 #define FAA(oldval,addr,val) \
308  do { \
309  ret_t ret; \
310  ptrsize_t new_val; \
311  oldval = *(addr); \
312  new_val = (ptrsize_t)oldval + val; \
313  CAS(ret,addr,new_val); \
314  if (ret) break; \
315  } while (1);
316  /*---------------------------------------------------------------------------*/
317  /* Reference counting */
318 #define SAFE_READ(x) rc_safe_read(&x)
319 #define DESC_RETIRE(x) rc_desc_release(x)
320 #define RC_INIT(x) x->rc = 1
321 #define RC_RELEASE(x) rc_desc_release(x)
322 
323 /*---------------------------------------------------------------------------*/
324  typedef union anchor {
325 
326  struct {
327  unsigned head : 7;
328  unsigned count : 7;
329  unsigned active : 1;
330  unsigned valid : 1;
331  anchorsize_t tag : 8 * sizeof (anchorsize_t) - 16;
332  } sep;
333  anchorsize_t all;
334  } anchor_t;
335 
336  /*---------------------------------------------------------------------------*/
337  typedef struct desc {
338  anchor_t volatile Anchor;
339  struct desc * volatile Next;
340  void * volatile SB;
341  struct procheap * volatile Heap;
342  struct desc * volatile Pdesc;
343  struct desc * volatile Ptr;
344  int sz;
345  int maxcount;
346  int volatile rc;
347  char pad[128
348  - sizeof (anchor_t)
349  - 5 * sizeof (void*)
350  - 3 * sizeof (int)
351  ];
352  } desc_t;
353 
354  /*---------------------------------------------------------------------------*/
355  typedef struct procheap {
356  desc_t * volatile Active;
357  desc_t * volatile Partial;
358  struct sizeclass * sc;
359  char pad[32 - 3 * sizeof (void*) ];
360  } procheap_t;
361 
362  /*---------------------------------------------------------------------------*/
363  typedef struct sizeclass {
365  desc_t * volatile PartialHead;
366  desc_t * volatile PartialTail;
367  unsigned numprocheaps;
368  unsigned sz;
369  unsigned sbsize;
370  unsigned maxcount;
371  char pad[128
372  - sizeof (procheap_t)
373  - 2 * sizeof (void*)
374  - 4 * sizeof (unsigned)
375  ];
376  } sizeclass_t;
377  /*---------------------------------------------------------------------------*/
378 #define MAXSIZECLASSES 32 /* max size classes */
379 
380 typedef struct {
381  sizeclass_t sizeclass[MAXSIZECLASSES];
382  procheap_t procheap[1024][MAXSIZECLASSES];
383  /* not really going to allocate that much */
384  } controlblock_t;
385  /*---------------------------------------------------------------------------*/
386 #define MAXCREDITS 128
387 #define CREDITSMASK 127
388 #define DESCBATCHSIZE 128
389 #define DESCSBSIZE DESCBATCHSIZE*sizeof(desc_t)
390 #define HYPERBLOCKSIZE 1024*1024
391 #define DEFAULTNUMHEAPS 64
392 #define MAXNUMHEAPS 512
393 #define NUMSIZECLASSES 27
394  /*---------------------------------------------------------------------------*/
395  //static char pad1[128];
396  static controlblock_t * volatile controlblock = NULL;
397  static unsigned numprocheaps = DEFAULTNUMHEAPS;
398  static unsigned heapidmask;
399  //static char pad2[128];
400  static unsigned scsz[NUMSIZECLASSES] = {
401  16, 24, 32, 40, 48, 64, 80, 96,
402  128, 160, 192, 256, 320, 384, 512, 640,
403  768, 1024, 1280, 2048, 4096, 8192, 16384, 32768,
404  65536, 131072, 262144
405  };
406  //static char pad3[128];
407  static desc_t * volatile DescAvail = NULL;
408  static desc_t * volatile HyperblockHead = NULL;
409  /*---------------------------------------------------------------------------*/
410  /*---------------------------------------------------------------------------*/
411 #define DOTRACE(a,b,c,d,e,f,g) \
412  if (!trace_stop) { \
413  unsigned id; \
414  unsigned i; \
415  id = (pthread_self() & 0xf)-1; /* only for AIX */ \
416  i = trace_ind[id][0]++ & TRACEMASK; \
417  tr[id][i].op = a; \
418  tr[id][i].sz = b; \
419  tr[id][i].desc = d; \
420  tr[id][i].heap = f; \
421  tr[id][i].anchor.all = g; \
422  }
423  /*
424  tr[id][i].sb = e; \
425  tr[id][i].id = id; \
426  tr[id][i].addr = c; \
427  unsigned ind; \
428  FAA32(ind,&trace_ind,1); \
429  */
430 #ifdef DEBUGTRACE
431 #define TRACE(a,b,c,d,e,f,g) std::cout<<a<<","<<b<<","<<c<<","<<d<<","<<e<<","<<f<<","<<g<<"\n";
432 #else
433 #define TRACE(a,b,c,d,e,f,g)
434 #endif
435  /*---------------------------------------------------------------------------*/
436 #ifdef DEBUGTRACE
437 
438 typedef struct trace {
439  unsigned id;
440  unsigned op;
441  unsigned sz;
442  unsigned pad;
443  void * addr;
444  void * desc;
445  void * sb;
446  void * heap;
448  } trace_t;
449  /*---------------------------------------------------------------------------*/
450 #define MAXTRACE 0x8000
451 #define TRACEMASK MAXTRACE-1
452 #define OP_MAKE_ACTIVE 1
453 #define OP_MADE_ACTIVE 2
454 #define OP_NOT_ACTIVE 3
455 #define OP_PARTIAL_STR 4
456 #define OP_PARTIAL_RSRV 5
457 #define OP_PARTIAL_END 6
458 #define OP_new__STR 7
459 #define OP_NOT_new_ 8
460 #define OP_new__END 9
461 #define OP_MALLOC_STR 10
462 #define OP_REGULAR_RSRV 11
463 #define OP_REGULAR_END 12
464 #define OP_FREE_STR 13
465 #define OP_FREE_END 14
466 #define OP_LIST_ENQ_STR 15
467 #define OP_LIST_ENQ_END 16
468 #define OP_PARTIAL_PUSH 17
469 #define OP_PARTIAL_POP_SLT 18
470 #define OP_PARTIAL_POP_LST 19
471 #define OP_PARTIAL_POPPED 20
472 #define OP_DESC_REMOVE_SLT 21
473 #define OP_DESC_REMOVE_LST 22
474 #define OP_DESC_REMOVED 23
475 #define OP_DESC_NOT_REMOVED 24
476  /*---------------------------------------------------------------------------*/
477  unsigned volatile trace_ind[3][32];
478  ptrsize_t volatile trace_stop = 0;
479  unsigned volatile trace_dumped = 0;
480  trace_t tr[3][MAXTRACE];
481 
482  /*---------------------------------------------------------------------------*/
483  void debug() {
484  unsigned total, i, j, k;
485  FILE * fd;
486  ret_t ret;
487  char opstr[40];
488  char trfile[40];
489 
490  printf("%d assertion failed .....\n", pthread_self());
491  CAS(ret, &trace_stop, 0, 1);
492  if (!ret) {
493  while (!trace_dumped);
494  return;
495  }
496  printf("%d dumping trace .....\n", pthread_self());
497  strcpy(trfile, "trace");
498  fd = fopen(trfile, "w");
499  assert(fd);
500  for (k = 0; k < 3; k++) {
501  if (trace_ind[k][0] <= MAXTRACE) {
502  j = 0;
503  total = trace_ind[k][0];
504  } else {
505  j = trace_ind[k][0] % MAXTRACE;
506  total = MAXTRACE;
507  }
508  printf("%d total trace = %lld %d\n", k, trace_ind[k][0], total);
509  for (i = 1; i <= total; i++) {
510  switch (tr[k][j].op) {
511  case OP_MAKE_ACTIVE: strcpy(opstr, "MAKE_ACTIVE");
512  break;
513  case OP_MADE_ACTIVE: strcpy(opstr, "MADE_ACTIVE");
514  break;
515  case OP_NOT_ACTIVE: strcpy(opstr, "NOT_ACTIVE");
516  break;
517  case OP_PARTIAL_STR: strcpy(opstr, "PARTIAL_STR");
518  break;
519  case OP_PARTIAL_RSRV: strcpy(opstr, "PARTIAL_RSRV");
520  break;
521  case OP_PARTIAL_END: strcpy(opstr, "PARTIAL_END");
522  break;
523  case OP_new__STR: strcpy(opstr, "new__STR");
524  break;
525  case OP_NOT_new_: strcpy(opstr, "NOT_new_");
526  break;
527  case OP_new__END: strcpy(opstr, "new__END");
528  break;
529  case OP_MALLOC_STR: strcpy(opstr, "MALLOC_STR");
530  break;
531  case OP_REGULAR_RSRV: strcpy(opstr, "REGULAR_RSRV");
532  break;
533  case OP_REGULAR_END: strcpy(opstr, "REGULAR_END");
534  break;
535  case OP_FREE_STR: strcpy(opstr, "FREE_STR");
536  break;
537  case OP_FREE_END: strcpy(opstr, "FREE_END");
538  break;
539  case OP_LIST_ENQ_STR: strcpy(opstr, "LIST_ENQ_STR");
540  break;
541  case OP_LIST_ENQ_END: strcpy(opstr, "LIST_ENQ_END");
542  break;
543  case OP_PARTIAL_PUSH: strcpy(opstr, "PARTIAL_PUSH");
544  break;
545  case OP_PARTIAL_POP_SLT: strcpy(opstr, "PARTIAL_POP_SLT");
546  break;
547  case OP_PARTIAL_POP_LST: strcpy(opstr, "PARTIAL_POP_LST");
548  break;
549  case OP_PARTIAL_POPPED: strcpy(opstr, "PARTIAL_POPPED");
550  break;
551  case OP_DESC_REMOVE_SLT: strcpy(opstr, "DESC_REMOVE_SLT");
552  break;
553  case OP_DESC_REMOVE_LST: strcpy(opstr, "DESC_REMOVE_LST");
554  break;
555  case OP_DESC_REMOVED: strcpy(opstr, "DESC_REMOVED");
556  break;
557  case OP_DESC_NOT_REMOVED: strcpy(opstr, "DESC_NOT_REMOVED");
558  break;
559  default: assert(0);
560  }
561  fprintf(fd, "%5d ", i);
562  /* fprintf(fd,"p%d ",tr[k][j].id); */
563  fprintf(fd, "p%d ", k);
564  fprintf(fd, "sz=%5d ", tr[k][j].sz);
565  //fprintf(fd,"a = 0x%15llx ",tr[k][j].addr);
566  fprintf(fd, "d=%6llx ", (ptrsize_t) tr[k][j].desc % 0x1000000);
567  //fprintf(fd,"sb = 0x%15llx ",tr[k][j].sb);
568  fprintf(fd, "h=%5llx ", (ptrsize_t) tr[k][j].heap % 0x100000);
569  fprintf(fd, "<%2d,", tr[k][j].anchor.sep.head);
570  fprintf(fd, "%05x,", tr[k][j].anchor.sep.tag % 0x100000);
571  fprintf(fd, "%2d,", tr[k][j].anchor.sep.count);
572  fprintf(fd, "%d,", tr[k][j].anchor.sep.active);
573  fprintf(fd, "%d> ", tr[k][j].anchor.sep.valid);
574  fprintf(fd, "%s ", opstr);
575  fprintf(fd, "\n");
576  j = (j + 1) % MAXTRACE;
577  }
578  }
579  fflush(fd);
580  fclose(fd);
581  trace_dumped = 1;
582  }
583 #endif
584  /*---------------------------------------------------------------------------*/
585  /*---------------------------------------------------------------------------*/
586 #if defined(PPC)
587  void __malloc_start__()
588 #else
589 
590 __attribute__((constructor)) void clfmalloc_init()
591 #endif
592  {
593  controlblock_t * cb;
594  sizeclass_t * sc;
595  ret_t ret;
596  unsigned size;
597  unsigned i;
598 
599 
600  assert(sizeof (desc_t) == 128);
601  assert(sizeof (sizeclass_t) == 128);
602  assert(sizeof (procheap_t) == 32);
603  assert(NUMSIZECLASSES <= MAXSIZECLASSES);
604 
605 
606  /* Set number of proc heaps */
607  char * nphstr = getenv("CLFMALLOC_NUMHEAPS");
608  if (nphstr) {
609  int nph = atoi(nphstr);
610  if ((nph >= 1) && (nph <= MAXNUMHEAPS)) {
611  numprocheaps = 1;
612  while (numprocheaps < nph)
613  numprocheaps <<= 1;
614  }
615  }
616  heapidmask = numprocheaps - 1;
617 
618  /* allocate control block */
619  size = 4096 + numprocheaps * 1024;
620  if ((cb = (controlblock_t *) MMAP(size)) == MAP_FAILED) {
621  perror("clfmalloc_init mmap failed\n");
622  }
623 
624  for (i = 0; i < NUMSIZECLASSES; i++) {
625  sc = &cb->sizeclass[i];
626  sc->PartialHead = NULL;
627  sc->PartialTail = NULL;
628  sc->sz = scsz[i];
629  if (sc->sz <= 32) {
630  sc->sbsize = 2 * 1024;
631  } else if (sc->sz <= 64) {
632  sc->sbsize = 4 * 1024;
633  } else if (sc->sz <= 128) {
634  sc->sbsize = 8 * 1024;
635  } else if (sc->sz <= 256) {
636  sc->sbsize = 16 * 1024;
637  } else if (sc->sz <= 512) {
638  sc->sbsize = 32 * 1024;
639  } else if (sc->sz <= (32 * 1024)) {
640  sc->sbsize = 64 * 1024;
641  } else {
642  sc->sbsize = HYPERBLOCKSIZE;
643  }
644  sc->maxcount = sc->sbsize / sc->sz;
645  if ((i < 20) && (numprocheaps > 1)) {
646  unsigned j;
647  sc->numprocheaps = numprocheaps;
648  for (j = 0; j < sc->numprocheaps; j++) {
649  cb->procheap[j][i].Active = NULL;
650  cb->procheap[j][i].Partial = NULL;
651  cb->procheap[j][i].sc = sc;
652  }
653  } else {
654  sc->numprocheaps = 1;
655  sc->procheap.Active = NULL;
656  sc->procheap.Partial = NULL;
657  sc->procheap.sc = sc;
658  }
659  }
660  LWSYNC; /* wbw */
661  WBW;
662  CAS(ret, &controlblock, NULL, cb);
663  if (!ret) {
664  if (munmap((void *) cb, size)) {
665  perror("clfmalloc_init munmap failed\n");
666  }
667  }
668  }
669 
670  /*---------------------------------------------------------------------------*/
671  static void desc_push(desc_t * desc) {
672  ret_t ret;
673  do {
674  desc_t * head = DescAvail;
675  desc->Next = head;
676  LWSYNC; /* wbw */
677  WBW;
678  CAS(ret, &DescAvail, head, desc); /* no ABA */
679  } while (!ret);
680  }
681  /*---------------------------------------------------------------------------*/
682 
683  /*---------------------------------------------------------------------------*/
684  static void rc_desc_release(desc_t * desc) {
685  int old, new_;
686  rc_ret_t ret;
687 
688  LWSYNC; /* rbw */
689  RBW;
690  ASSERT(desc);
691  do {
692  old = desc->rc;
693  new_ = old - 1;
694  CAS32(ret, &desc->rc, old, new_);
695  } while (!ret);
696  ASSERT(new_ >= 0);
697  if (new_ == 0) {
698  /* Probably store is sufficient */
699  do {
700  old = desc->rc;
701  new_ = old + 1;
702  CAS32(ret, &desc->rc, old, new_);
703  } while (!ret);
704  desc_push(desc);
705  }
706  }
707 
708  /*---------------------------------------------------------------------------*/
709  static desc_t * rc_safe_read(desc_t * volatile * addr) {
710  desc_t * desc;
711  int old, new_;
712  rc_ret_t ret;
713 
714  while (1) {
715  desc = *addr;
716  if (desc == NULL) return NULL;
717  do {
718  old = desc->rc;
719  new_ = old + 1;
720  if (old == 0) break;
721  CAS32(ret, &desc->rc, old, new_);
722  } while (!ret);
723  if (old == 0) continue;
724  ISYNC; /* rbr */
725  RBR;
726  if (desc == *addr) return desc;
727  rc_desc_release(desc);
728  }
729  }
730  /*---------------------------------------------------------------------------*/
731 
732  /*---------------------------------------------------------------------------*/
733  static desc_t * desc_alloc() {
734  desc_t * desc;
735  ret_t ret;
736 
737  while (1) {
738  desc = SAFE_READ(DescAvail);
739  if (desc) {
740  desc_t * next = desc->Next;
741  CAS(ret, &DescAvail, desc, next); /* ABA */
742  RC_RELEASE(desc);
743  if (ret) {
744  break;
745  }
746  } else {
747  desc_t * ptr;
748  if ((desc = (desc_t *) MMAP(DESCSBSIZE)) == MAP_FAILED) {
749  perror("desc_alloc mmap failed\n");
750  }
751  RC_INIT(desc);
752  for (ptr = desc + 1;
753  (ptrsize_t) (ptr + 1)<(ptrsize_t) (desc) + DESCSBSIZE;
754  ptr++) {
755  ptr->Next = ptr + 1;
756  RC_INIT(ptr);
757  }
758  ptr->Next = NULL;
759  RC_INIT(ptr);
760  LWSYNC; /* wbw */
761  WBW;
762  CAS(ret, &DescAvail, NULL, (desc_t *) (desc + 1));
763  if (ret)
764  break;
765  if (munmap((void *) desc, DESCSBSIZE)) {
766  perror("desc_alloc munmap failed\n");
767  }
768  }
769  }
770  desc->Ptr = (desc_t *) 0xfedcba98;
771  desc->Heap = (procheap_t *) 0xfedcba98;
772  return desc;
773  }
774  /*---------------------------------------------------------------------------*/
775  static void * super_malloc(unsigned sz);
776  static void super_free(void * ptr);
777 
778  /*---------------------------------------------------------------------------*/
779  static void sb_alloc(desc_t * desc) {
780  sizeclass_t * sc;
781  void ** addr;
782  unsigned sz;
783 
784  sc = desc->Heap->sc;
785  sz = sc->sz;
786  if (sc->sbsize < HYPERBLOCKSIZE) {
787  ptrsize_t num;
788  addr = (void**) super_malloc(sc->sbsize - EIGHT);
789  num = (ptrsize_t) addr;
790  num -= EIGHT;
791  addr = (void**) num;
792  desc->Pdesc = *(desc_t**) addr;
793  } else {
794  /* sbsize == 1M */
795  desc->Pdesc = NULL;
796  do {
797  desc_t * head;
798  desc_t * next;
799  ret_t ret;
800 
801  head = SAFE_READ(HyperblockHead);
802  if (head) {
803  next = head->Next;
804  CAS(ret, &HyperblockHead, head, next); /* ABA */
805  RC_RELEASE(head);
806  if (ret) {
807  addr = (void**) head->SB;
808  DESC_RETIRE(head);
809  break;
810  }
811  } else {
812  ASSERT(sc->sbsize == 0x100000);
813  if ((addr = (void **) MMAP(sc->sbsize)) == MAP_FAILED) {
814  perror("sb_alloc mmap failed\n");
815  }
816  break;
817  }
818  } while (1);
819  }
820  /* prepare sb - make linked list - set header to desc */
821  {
822  unsigned i;
823  desc_t ** ptr1;
824  unsigned * ptr2;
825  ptrsize_t num1;
826  ptrsize_t num2;
827 
828  num1 = (ptrsize_t) addr;
829  for (i = 0; i < sc->maxcount; i++) {
830  ptr1 = (desc_t**) num1;
831  *ptr1 = desc;
832  num2 = num1 + EIGHT;
833  ptr2 = (unsigned*) num2;
834  *ptr2 = i + 1;
835  num1 += sz;
836  }
837  }
838  desc->SB = addr;
839  }
840 
841  /*---------------------------------------------------------------------------*/
842  static void sb_dealloc(void * sb, desc_t * pdesc) {
843  if (pdesc) {
844  /* sbsize <= 64K */
845  *(desc_t**) sb = pdesc;
846  sb = (char*) sb + EIGHT;
847  super_free(sb);
848  } else {
849  /* sbsize is 1 MB */
850  ret_t ret;
851 
852  desc_t * desc = desc_alloc();
853  desc->SB = sb;
854  do {
855  desc_t * head;
856  head = HyperblockHead;
857  desc->Next = head;
858  LWSYNC; /* wbw */
859  WBW;
860  CAS(ret, &HyperblockHead, head, desc); /* no ABA */
861  } while (!ret);
862  }
863  }
864  /*---------------------------------------------------------------------------*/
865 #ifdef REFCOUNT
866 
867 /*---------------------------------------------------------------------------*/
868  static void list_enqueue(sizeclass_t * sc, desc_t * desc) {
869  ret_t ret;
870  ASSERT(sc);
871  desc_t * aux = desc_alloc();
872  aux->Ptr = desc;
873  do {
874  desc_t * head = sc->PartialHead;
875  aux->Next = head;
876  LWSYNC; /* wbw */
877  WBW;
878  CAS(ret, &sc->PartialHead, head, aux);
879  } while (!ret);
880  TRACE(OP_LIST_ENQ_END, desc->sz, 0, desc, desc->SB, desc->Heap, desc->Anchor.all);
881  }
882 
883  /*---------------------------------------------------------------------------*/
884  static desc_t * list_pop(sizeclass_t * sc) {
885  /* pop from central list */
886  desc_t * desc;
887  ret_t ret;
888  TRACE(OP_PARTIAL_POP_LST, 0, 0, 0, 0, 0, 0);
889  do {
890  desc_t * head = SAFE_READ(sc->PartialHead);
891  if (!head)
892  return NULL;
893  desc_t * next = head->Next;
894  desc = head->Ptr;
895  CAS(ret, &sc->PartialHead, head, next);
896  RC_RELEASE(head);
897  if (ret) {
898  DESC_RETIRE(head);
899  ASSERT(desc);
900  break;
901  }
902  } while (1);
903  TRACE(OP_PARTIAL_POPPED, desc->sz, 0, desc, desc->SB, desc->Heap, desc->Anchor.all);
904  return desc;
905 
906  }
907 
908  /*---------------------------------------------------------------------------*/
909  static void partial_push(procheap_t * heap, desc_t * desc) {
910  ret_t ret;
911  desc_t * desc2;
912 
913  ASSERT(desc);
914  ASSERT(heap);
915  ASSERT(heap->sc);
916  do {
917  desc2 = heap->Partial;
918  CAS(ret, &heap->Partial, desc2, desc); /* no ABA */
919  } while (!ret);
920  TRACE(OP_PARTIAL_PUSH, desc->sz, 0, desc, desc->SB, heap, desc->Anchor.all);
921  if (!desc2)
922  return;
923  if (!desc2->Anchor.sep.valid) {
924  DESC_RETIRE(desc2);
925  } else {
926  list_enqueue(heap->sc, desc2);
927  }
928  }
929 
930  /*---------------------------------------------------------------------------*/
931  static desc_t * partial_pop(procheap_t* heap) {
932  ret_t ret;
933  TRACE(OP_PARTIAL_POP_SLT, 0, 0, 0, 0, heap, 0);
934  ASSERT(heap);
935  ASSERT(heap->sc);
936  do {
937  desc_t * desc = heap->Partial;
938  if (!desc)
939  break;
940  CAS(ret, &heap->Partial, desc, NULL); /* no ABA */
941  if (ret) {
942  return desc;
943  }
944  } while (1);
945  /* pop from central list */
946  return list_pop(heap->sc);
947  }
948 
949  /*---------------------------------------------------------------------------*/
950  static void list_desc_remove(procheap_t * heap) {
951  }
952  /*---------------------------------------------------------------------------*/
953 #else /* LLSC */
954 
955 /*---------------------------------------------------------------------------*/
956  static void list_enqueue(desc_t * desc) {
957  sizeclass_t * sc;
958 
959  //TRACE(OP_LIST_ENQ_STR,desc->sz,0,desc,desc->SB,desc->Heap,desc->Anchor.all);
960  sc = desc->Heap->sc;
961  ASSERT(sc);
962  desc->Next = NULL;
963  LWSYNC;
964  while (1) {
965  desc_t * tail;
966  desc_t * head;
967  desc_t * next;
968  void * ret;
969 
970  tail = sc->PartialTail;
971  if (tail == NULL) {
972  LL(tail, &sc->PartialTail);
973  if (tail != NULL)
974  continue;
975  SC(ret, &sc->PartialTail, desc); /* no ABA */
976  if (!ret)
977  continue;
978  CAS(ret, &sc->PartialHead, NULL, desc);
979  break;
980  }
981  /* tail != NULL */
982  if (sc->PartialHead == NULL) {
983  LL(head, &sc->PartialHead);
984  if (head != NULL)
985  continue;
986  ISYNC;
987  if (sc->PartialTail != tail)
988  continue;
989  SC(ret, &sc->PartialHead, tail); /* no ABA */
990  continue;
991  }
992  /* tail != NULL && head != NULL */
993  next = tail->Next;
994  if (next != NULL) {
995  /* help previous enqueue */
996  LL(ret, &sc->PartialTail);
997  if (ret != tail)
998  continue;
999  ISYNC;
1000  if (tail->Next != next)
1001  continue;
1002  SC(ret, &sc->PartialTail, next); /* ABA */
1003  continue;
1004  }
1005  /* next == NULL */
1006  LL(next, &tail->Next);
1007  if (next != NULL)
1008  continue;
1009  ISYNC;
1010  if (sc->PartialTail != tail)
1011  continue;
1012  SC(ret, &tail->Next, desc); /* ABA */
1013  if (!ret)
1014  continue;
1015  LL(ret, &sc->PartialTail);
1016  if (ret != tail)
1017  break;
1018  ISYNC;
1019  if (tail->Next != desc)
1020  break;
1021  SC(ret, &sc->PartialTail, desc); /* ABA */
1022  break;
1023  }
1024  TRACE(OP_LIST_ENQ_END, desc->sz, 0, desc, desc->SB, desc->Heap, desc->Anchor.all);
1025  }
1026 
1027  /*---------------------------------------------------------------------------*/
1028  static void partial_push(procheap_t * heap, desc_t * desc) {
1029  ret_t ret;
1030  desc_t * desc2;
1031 
1032  //ASSERT(desc);
1033  //ASSERT(heap);
1034  //ASSERT(heap->sc);
1035  do {
1036  LL(desc2, &heap->Partial);
1037  SC(ret, &heap->Partial, desc); /* no ABA */
1038  } while (!ret);
1039  TRACE(OP_PARTIAL_PUSH, desc->sz, 0, desc, desc->SB, heap, desc->Anchor.all);
1040  if (!desc2) return;
1041  if (!desc2->Anchor.sep.valid) {
1042  desc_push(desc2); /* LL/SC no rc or hp */
1043  return;
1044  } else {
1045  list_enqueue(desc2);
1046  }
1047  }
1048 
1049  /*---------------------------------------------------------------------------*/
1050  static desc_t* partial_pop(procheap_t* heap) {
1051  desc_t* desc;
1052  desc_t* next;
1053  sizeclass_t* sc;
1054  void* ret;
1055  anchor_t tmp;
1056 
1057  TRACE(OP_PARTIAL_POP_SLT, 0, 0, 0, 0, heap, 0);
1058  ASSERT(heap);
1059  ASSERT(heap->sc);
1060  do {
1061  LL(desc, &heap->Partial);
1062  if (!desc)
1063  break;
1064  SC(ret, &heap->Partial, NULL); /* no ABA */
1065  if (ret) {
1066  /*
1067  tmp.all = desc->Anchor.all;
1068  TRACE(OP_PARTIAL_POPPED,desc->sz,0,desc,desc->SB,heap,tmp.all);
1069  ASSERT(!tmp.sep.active);
1070  ASSERT(tmp.sep.count > 0);
1071  */
1072  return desc;
1073  }
1074  } while (1);
1075  /* pop from central list */
1076  TRACE(OP_PARTIAL_POP_LST, 0, 0, 0, 0, heap, 0);
1077  sc = heap->sc;
1078  do {
1079  LL(desc, &sc->PartialHead);
1080  if (!desc)
1081  return NULL;
1082  next = desc->Next;
1083  if (next == NULL) {
1084  if (!desc->Anchor.sep.valid)
1085  return NULL;
1086  else {
1087  desc_t * desc2;
1088  desc2 = desc_alloc();
1089  desc2->Anchor.sep.valid = FALSE;
1090  desc2->Heap = heap; /* needed */
1091  list_enqueue(desc2);
1092  }
1093  continue;
1094  }
1095  ISYNC;
1096  if (sc->PartialTail == desc) {
1097  /* help the enqueue */
1098  LL(ret, &sc->PartialTail);
1099  if (ret != desc)
1100  continue;
1101  ISYNC;
1102  if (desc->Next != next)
1103  continue;
1104  SC(ret, &sc->PartialTail, next); /* ABA */
1105  continue;
1106  }
1107  SC(ret, &sc->PartialHead, next); /* ABA */
1108  if (ret) {
1109  break;
1110  }
1111  } while (1);
1112  TRACE(OP_PARTIAL_POPPED, desc->sz, 0, desc, desc->SB, heap, desc->Anchor.all);
1113  return desc;
1114  }
1115 
1116  /*---------------------------------------------------------------------------*/
1117  static void list_desc_remove(procheap_t * heap) {
1118  desc_t * head;
1119  desc_t * next;
1120  sizeclass_t * sc;
1121  void * ret;
1122  unsigned nonempty = 0;
1123 
1124  TRACE(OP_DESC_REMOVE_SLT, 0, 0, 0, 0, heap, 0);
1125  ASSERT(heap);
1126  ASSERT(heap->sc);
1127  do {
1128  LL(head, &heap->Partial);
1129  if (!head)
1130  break;
1131  if (head->Anchor.sep.valid)
1132  break;
1133  SC(ret, &heap->Partial, NULL); /* can recover later from ABA */
1134  if (ret) {
1135  ISYNC;
1136  if (head->Anchor.sep.valid) {
1137  /* ABA happenned - put it back */
1138  partial_push(heap, head);
1139  break;
1140  } else {
1141  /* really invalid */
1142  TRACE(OP_DESC_REMOVED, head->sz, 0, head, head->SB, heap, head->Anchor.all);
1143  desc_push(head); /****/
1144  return;
1145  }
1146  }
1147  } while (1);
1148  /* pop from global heap */
1149  TRACE(OP_DESC_REMOVE_LST, 0, 0, 0, 0, heap, 0);
1150  sc = heap->sc;
1151  do {
1152  LL(head, &sc->PartialHead);
1153  if (!head)
1154  break;
1155  next = head->Next;
1156  if (!next)
1157  break;
1158  ISYNC;
1159  if (sc->PartialTail != head) {
1160  ISYNC;
1161  if (head->Next != next)
1162  continue;
1163  SC(ret, &sc->PartialHead, next); /* ABA */
1164  if (ret) {
1165  if (head->Anchor.sep.valid) {
1166  ASSERT(head->Heap);
1167  ASSERT(head->Heap->sc);
1168  list_enqueue(head);
1169  if (nonempty)
1170  break;
1171  nonempty = 1;
1172  } else {
1173  TRACE(OP_DESC_REMOVED, head->sz, 0, head, head->SB, heap, head->Anchor.all);
1174  desc_push(head); /***/
1175  return;
1176  }
1177  }
1178  } else {
1179  /* help the enqueue */
1180  LL(ret, &sc->PartialTail);
1181  if (ret != head)
1182  continue;
1183  ISYNC;
1184  if (head->Next != next)
1185  continue;
1186  SC(ret, &sc->PartialTail, next); /* ABA */
1187  }
1188  } while (1);
1189  TRACE(OP_DESC_NOT_REMOVED, 0, 0, 0, 0, heap, 0);
1190  }
1191  /*---------------------------------------------------------------------------*/
1192 #endif /* REFCOUNT or LLSC */
1193  /*---------------------------------------------------------------------------*/
1194 #ifdef PLDI2004
1195 
1196 /*---------------------------------------------------------------------------*/
1197  static void make_active(procheap_t * heap, desc_t * desc, unsigned morecredits) {
1198  ptrsize_t new_active;
1199  anchor_t oldanchor;
1200  anchor_t new_anchor;
1201  ret_t ret;
1202 
1203  TRACE(OP_MAKE_ACTIVE, desc->sz, 0, desc, desc->SB, heap, desc->Anchor.all);
1204  new_active = (ptrsize_t) (desc)+(morecredits - 1);
1205  CAS(ret, &heap->Active, NULL, new_active);
1206  if (ret) {
1207  TRACE(OP_MADE_ACTIVE, desc->sz, 0, desc, desc->SB, heap, desc->Anchor.all);
1208  return;
1209  }
1210  /*
1211  Someone installed another active sb.
1212  Return credits and make sb partial
1213  */
1214  do {
1215  aret_t aret;
1216  LLA(oldanchor.all, &desc->Anchor.all);
1217  new_anchor.all = oldanchor.all;
1218  new_anchor.sep.count += morecredits;
1219  new_anchor.sep.active = FALSE;
1220  new_anchor.sep.tag++; /* for debugging */
1221  SCA(aret, &desc->Anchor.all, oldanchor.all, new_anchor.all); /* no ABA */
1222  if (aret)
1223  break;
1224  } while (1);
1225  TRACE(OP_NOT_ACTIVE, desc->sz, 0, desc, desc->SB, heap, new_anchor.all);
1226  LWSYNC; /* wbw */
1227  WBW;
1228  partial_push(heap, desc);
1229  }
1230 
1231  /*---------------------------------------------------------------------------*/
1232  static void * malloc_from_partial(procheap_t * heap) {
1233  desc_t * desc;
1234  desc_t ** addr;
1235  anchor_t oldanchor;
1236  anchor_t new_anchor;
1237  void * sb;
1238  unsigned morecredits;
1239  unsigned sz;
1240 
1241  TRACE(OP_PARTIAL_STR, 0, 0, 0, 0, heap, 0);
1242 retry:
1243  desc = partial_pop(heap);
1244  if (!desc)
1245  return NULL;
1246  do {
1247  aret_t aret;
1248  LLA(oldanchor.all, &desc->Anchor.all);
1249  new_anchor.all = oldanchor.all;
1250  if (!oldanchor.sep.valid) {
1251  DESC_RETIRE(desc);
1252  goto retry;
1253  }
1254  morecredits = MIN(oldanchor.sep.count - 1, MAXCREDITS);
1255  new_anchor.sep.count -= morecredits + 1;
1256  new_anchor.sep.active = (morecredits > 0);
1257  new_anchor.sep.tag++; /* for debugging */
1258  SCA(aret, &desc->Anchor.all, oldanchor.all, new_anchor.all);
1259  if (aret)
1260  break;
1261  } while (1);
1262  ASSERT(!oldanchor.sep.active);
1263  ASSERT(oldanchor.sep.count > 0);
1264  ASSERT(oldanchor.sep.count < desc->maxcount);
1265  sb = desc->SB;
1266  sz = heap->sc->sz;
1267  TRACE(OP_PARTIAL_RSRV, sz, 0, desc, sb, heap, new_anchor.all);
1268  do { /* pop block */
1269  aret_t aret;
1270  LLA(oldanchor.all, &desc->Anchor.all);
1271  new_anchor.all = oldanchor.all;
1272  ASSERT(oldanchor.sep.valid);
1273  addr = sb + oldanchor.sep.head * sz + EIGHT;
1274  new_anchor.sep.head = *(unsigned *) addr;
1275  new_anchor.sep.tag++;
1276  SCA(aret, &desc->Anchor.all, oldanchor.all, new_anchor.all);
1277  if (aret)
1278  break;
1279  } while (1);
1280  TRACE(OP_PARTIAL_END, sz, addr, desc, sb, heap, new_anchor.all);
1281  if (morecredits > 0) { /* update Active */
1282  /* cannot check for new_anchor.active because someone else
1283  might have already made it active even though our morecredits
1284  is zero */
1285  ASSERT(new_anchor.sep.active);
1286  desc->Heap = heap;
1287  LWSYNC; /* wbw */
1288  WBW;
1289  make_active(heap, desc, morecredits);
1290  }
1291  return addr;
1292  }
1293 
1294  /*---------------------------------------------------------------------------*/
1295  static void * malloc_from_new__sb(procheap_t * heap) {
1296  desc_t ** addr;
1297  desc_t * desc;
1298  void * sb;
1299  ret_t ret;
1300  anchor_t new_anchor;
1301  ptrsize_t new_active;
1302  unsigned credits;
1303 
1304  desc = desc_alloc();
1305  desc->Heap = heap;
1306  sb_alloc(desc);
1307  credits = MIN(heap->sc->maxcount - 1, MAXCREDITS);
1308  new_active = (ptrsize_t) (desc)+(credits - 1);
1309  new_anchor.all = desc->Anchor.all;
1310  TRACE(OP_new__STR, desc->sz, 0, desc, desc->SB, heap, new_anchor.all);
1311  new_anchor.sep.head = 1;
1312  new_anchor.sep.count = (heap->sc->maxcount - 1) - credits;
1313  new_anchor.sep.active = TRUE;
1314  new_anchor.sep.valid = TRUE;
1315  new_anchor.sep.tag++; /* for debugging */
1316  desc->Anchor.all = new_anchor.all;
1317  desc->sz = heap->sc->sz;
1318  desc->maxcount = heap->sc->maxcount;
1319  LWSYNC; /* wbw */
1320  WBW;
1321  CAS(ret, &heap->Active, NULL, new_active);
1322  if (!ret) {
1323  sb_dealloc(desc->SB, desc->Pdesc);
1324  new_anchor.sep.valid = FALSE;
1325  desc->Anchor.all = new_anchor.all;
1326  TRACE(OP_NOT_new_, desc->sz, 0, desc, desc->SB, heap, new_anchor.all);
1327  DESC_RETIRE(desc);
1328  return NULL;
1329  }
1330  sb = desc->SB;
1331  addr = sb + EIGHT;
1332  TRACE(OP_new__END, heap->sc->sz, addr, desc, sb, heap, new_anchor.all);
1333  return addr;
1334  }
1335 
1336  /*---------------------------------------------------------------------------*/
1337  static void * malloc_regular(procheap_t * heap) {
1338  void ** addr;
1339  desc_t * desc;
1340  ptrsize_t oldactive;
1341  ptrsize_t new_active;
1342  anchor_t oldanchor;
1343  anchor_t new_anchor;
1344  void * sb;
1345  unsigned credits;
1346  unsigned morecredits = 0;
1347 
1348  TRACE(OP_MALLOC_STR, heap->sc->sz, 0, 0, 0, heap, 0);
1349  do {
1350  ret_t ret;
1351  oldactive = (ptrsize_t) (heap->Active);
1352  new_active = oldactive;
1353  if (!oldactive) {
1354  addr = malloc_from_partial(heap);
1355  if (addr) return addr;
1356  addr = malloc_from_new__sb(heap);
1357  if (addr) return addr;
1358  continue;
1359  }
1360  credits = oldactive & CREDITSMASK;
1361  if (credits == 0) {
1362  new_active = (ptrsize_t) NULL;
1363  } else {
1364  new_active--;
1365  }
1366  CAS(ret, &heap->Active, oldactive, new_active); /* no ABA */
1367  if (ret)
1368  break;
1369  } while (1);
1370  desc = (desc_t*) (oldactive - credits);
1371  sb = desc->SB;
1372  TRACE(OP_REGULAR_RSRV, desc->sz, 0, desc, sb, heap, desc->Anchor.all);
1373  do {
1374  aret_t aret;
1375  LLA(oldanchor.all, &desc->Anchor.all);
1376  new_anchor.all = oldanchor.all;
1377  ASSERT(oldanchor.sep.valid);
1378  //ASSERT(oldanchor.sep.head < desc->maxcount);
1379  //ASSERT(oldanchor.sep.count < desc->maxcount);
1380  //addr = sb+oldanchor.sep.head*sc->sz+sizeof(desc_t*);
1381  addr = sb + oldanchor.sep.head * desc->sz + EIGHT;
1382  new_anchor.sep.head = *(unsigned *) addr;
1383  if (credits == 0) {
1384  ASSERT(oldanchor.sep.active);
1385  morecredits = MIN(oldanchor.sep.count, MAXCREDITS);
1386  new_anchor.sep.count -= morecredits;
1387  new_anchor.sep.active = (morecredits > 0);
1388  }
1389  new_anchor.sep.tag++;
1390  SCA(aret, &desc->Anchor.all, oldanchor.all, new_anchor.all);
1391  if (aret)
1392  break;
1393  } while (1);
1394  TRACE(OP_REGULAR_END, desc->sz, addr, desc, sb, heap, new_anchor.all);
1395  if (morecredits > 0)
1396  make_active(heap, desc, morecredits);
1397  return addr;
1398  }
1399  /*---------------------------------------------------------------------------*/
1400 #if defined(PPC)
1401  void __free__(void * ptr)
1402 #else
1403 
1404 void free(void * ptr)
1405 #endif
1406  {
1407  desc_t ** header;
1408  desc_t * desc;
1409  anchor_t oldanchor;
1410  anchor_t new_anchor;
1411  ptrsize_t num;
1412  void * sb;
1413  procheap_t * heap;
1414  desc_t * pdesc;
1415  size_t sz;
1416 
1417  if (!ptr)
1418  return;
1419  header = ptr - EIGHT;
1420  desc = *header;
1421  num = (ptrsize_t) desc;
1422  if (num & 1) {
1423  /* Big block */
1424  num--;
1425  if (munmap((void *) header, num)) {
1426  perror("free munmap failed\n");
1427  }
1428  return;
1429  }
1430  sb = desc->SB;
1431  heap = desc->Heap;
1432  //sz = heap->sc->sz;
1433  sz = desc->sz;
1434  TRACE(OP_FREE_STR, sz, header, desc, sb, heap, desc->Anchor.all);
1435  /* Small block */
1436  do {
1437  aret_t aret;
1438  LLA(oldanchor.all, &desc->Anchor.all);
1439  new_anchor.all = oldanchor.all;
1440  ASSERT(oldanchor.sep.valid);
1441  *(unsigned *) ptr = oldanchor.sep.head;
1442  /* need fence before SC */
1443  LWSYNC; /* wbw */
1444  WBW;
1445  new_anchor.sep.head = ((ptr - sb) / sz);
1446  if ((!oldanchor.sep.active) &&
1447  //(oldanchor.sep.count == heap->sc->maxcount-1)) {
1448  (oldanchor.sep.count == desc->maxcount - 1)) {
1449  new_anchor.sep.valid = FALSE;
1450  pdesc = desc->Pdesc;
1451  if (pdesc == (void*) 1) continue; /* rbw */
1452  RBW;
1453  } else
1454  new_anchor.sep.count++;
1455  new_anchor.sep.tag++; /* for debugging */
1456  SCA(aret, &desc->Anchor.all, oldanchor.all, new_anchor.all);
1457  if (aret)
1458  break;
1459  } while (1);
1460  TRACE(OP_FREE_END, sz, header, desc, sb, heap, new_anchor.all);
1461  if (!new_anchor.sep.valid) {
1462  sb_dealloc(sb, pdesc);
1463  list_desc_remove(heap);
1464  } else if ((!new_anchor.sep.active) && (new_anchor.sep.count == 1)) {
1465  LWSYNC; /* wbw */
1466  WBW;
1467  partial_push(heap, desc);
1468  }
1469  }
1470  /*---------------------------------------------------------------------------*/
1471 #else /* 2005 */
1472 
1473 /*---------------------------------------------------------------------------*/
1474  static void make_active(procheap_t * heap, desc_t* desc) {
1475  anchor_t oldanchor;
1476  anchor_t new_anchor;
1477  ret_t ret;
1478 
1479  CAS(ret, &heap->Active, NULL, desc);
1480  if (ret)
1481  return;
1482  /*
1483  Someone installed another active sb.
1484  Return credits and make sb partial
1485  */
1486  do {
1487  aret_t aret;
1488  LLA(oldanchor.all, &desc->Anchor.all);
1489  new_anchor.all = oldanchor.all;
1490  new_anchor.sep.count++;
1491  new_anchor.sep.active = FALSE;
1492  SCA(aret, &desc->Anchor.all, oldanchor.all, new_anchor.all);
1493  if (aret)
1494  break;
1495  } while (1);
1496  LWSYNC; /* wbw */
1497  WBW;
1498  partial_push(heap, desc);
1499  }
1500 
1501  /*---------------------------------------------------------------------------*/
1502  static void * malloc_from_partial(procheap_t * heap) {
1503  desc_t * desc;
1504  desc_t ** addr;
1505  anchor_t oldanchor;
1506  anchor_t new_anchor;
1507  void * sb;
1508  unsigned sz;
1509 
1510 retry:
1511  desc = partial_pop(heap);
1512  if (!desc)
1513  return NULL;
1514  do {
1515  aret_t aret;
1516  LLA(oldanchor.all, &desc->Anchor.all);
1517  new_anchor.all = oldanchor.all;
1518  if (!oldanchor.sep.valid) {
1519  DESC_RETIRE(desc);
1520  goto retry;
1521  }
1522  ASSERT(!oldanchor.sep.active);
1523  ASSERT(oldanchor.sep.count > 0);
1524  ASSERT(oldanchor.sep.count < desc->maxcount);
1525  sb = desc->SB;
1526  sz = heap->sc->sz;
1527  addr = (desc_t**) ((char*) sb + oldanchor.sep.head * sz + EIGHT);
1528  new_anchor.sep.head = *(unsigned *) addr;
1529  if (oldanchor.sep.count > 1) {
1530  new_anchor.sep.count -= 2;
1531  new_anchor.sep.active = TRUE;
1532  } else {
1533  ASSERT(oldanchor.sep.count == 1);
1534  new_anchor.sep.count = 0;
1535  }
1536  new_anchor.sep.tag++;
1537  SCA(aret, &desc->Anchor.all, oldanchor.all, new_anchor.all);
1538  if (aret)
1539  break;
1540  } while (1);
1541  /* update Active */
1542  if (new_anchor.sep.active) {
1543  desc->Heap = heap;
1544  LWSYNC; /* wbw */
1545  WBW;
1546  make_active(heap, desc);
1547  }
1548  return addr;
1549  }
1550 
1551  /*---------------------------------------------------------------------------*/
1552  static void * malloc_from_new__sb(procheap_t * heap) {
1553  desc_t ** addr;
1554  desc_t * desc;
1555  void * sb;
1556  ret_t ret;
1557  anchor_t new_anchor;
1558 
1559  desc = desc_alloc();
1560  desc->Heap = heap;
1561  sb_alloc(desc);
1562  new_anchor.all = desc->Anchor.all;
1563  new_anchor.sep.head = 1;
1564  new_anchor.sep.count = heap->sc->maxcount - 2;
1565  new_anchor.sep.active = TRUE;
1566  new_anchor.sep.valid = TRUE;
1567  desc->Anchor.all = new_anchor.all;
1568  desc->sz = heap->sc->sz;
1569  desc->maxcount = heap->sc->maxcount;
1570  LWSYNC; /* wbw */
1571  WBW;
1572  CAS(ret, &heap->Active, NULL, desc);
1573  if (!ret) {
1574  sb_dealloc(desc->SB, desc->Pdesc);
1575  new_anchor.sep.valid = FALSE;
1576  desc->Anchor.all = new_anchor.all;
1577  DESC_RETIRE(desc);
1578  return NULL;
1579  }
1580  sb = desc->SB;
1581  addr = (desc_t**) ((char*) sb + EIGHT);
1582  return addr;
1583  }
1584 
1585  /*---------------------------------------------------------------------------*/
1586  static void * malloc_regular(procheap_t * heap) {
1587  void ** addr;
1588  desc_t * desc;
1589  anchor_t oldanchor;
1590  anchor_t new_anchor;
1591  void * sb;
1592  ret_t ret;
1593  aret_t aret;
1594 
1595  do {
1596  desc = heap->Active;
1597  if (!desc) {
1598  addr = (void**) malloc_from_partial(heap);
1599  if (addr) break;
1600  addr = (void**) malloc_from_new__sb(heap);
1601  if (addr) break;
1602  continue;
1603  }
1604  LLA(oldanchor.all, &desc->Anchor.all);
1605  new_anchor.all = oldanchor.all;
1606  if (oldanchor.sep.count == 0) {
1607  CAS(ret, &heap->Active, desc, NULL);
1608  if (!ret) continue;
1609  do { /* malloc last */
1610  LLA(oldanchor.all, &desc->Anchor.all);
1611  new_anchor.all = oldanchor.all;
1612  ASSERT(oldanchor.sep.active);
1613  /* no need for RBR */
1614  sb = desc->SB;
1615  addr = (void**) ((char*) sb + oldanchor.sep.head * desc->sz + EIGHT);
1616  new_anchor.sep.head = *(unsigned *) addr;
1617  if (oldanchor.sep.count == 0)
1618  new_anchor.sep.active = FALSE;
1619  else
1620  new_anchor.sep.count--;
1621  new_anchor.sep.tag++;
1622  SCA(aret, &desc->Anchor.all, oldanchor.all, new_anchor.all);
1623  if (aret)
1624  break;
1625  } while (1);
1626  if (new_anchor.sep.active)
1627  make_active(heap, desc);
1628  break;
1629  }
1630  /* malloc from active */
1631  ISYNC; /* rbr */
1632  RBR;
1633  sb = desc->SB;
1634  addr = (void**) ((char*) sb + oldanchor.sep.head * desc->sz + EIGHT);
1635  if (heap->Active != desc) continue;
1636  if (!sb) continue; /* rbr */
1637  ISYNC; /* rbr */
1638  RBR;
1639  if (desc->Anchor.all != oldanchor.all) continue;
1640  ASSERT(oldanchor.sep.valid); /* failed ******/
1641  ASSERT(oldanchor.sep.active);
1642  new_anchor.sep.head = *(unsigned *) addr;
1643  new_anchor.sep.count--;
1644  new_anchor.sep.tag++;
1645  SCA(aret, &desc->Anchor.all, oldanchor.all, new_anchor.all);
1646  if (aret) {
1647  break;
1648  }
1649  } while (1);
1650  return addr;
1651  }
1652  /*---------------------------------------------------------------------------*/
1653 #if defined(PPC)
1654  void __free__(void * ptr)
1655 #else
1656 
1657 void free(void * ptr)
1658 #endif
1659  {
1660  desc_t ** header;
1661  desc_t * desc;
1662  anchor_t oldanchor;
1663  anchor_t new_anchor;
1664  ptrsize_t num;
1665  void * sb;
1666  procheap_t * heap;
1667  desc_t * pdesc;
1668  unsigned sz;
1669 
1670  if (!ptr)
1671  return;
1672  header = (desc_t**) ((char*) ptr - EIGHT);
1673  desc = *header;
1674  num = (ptrsize_t) desc;
1675  if (num & 1) {
1676  /* Big block */
1677  num--;
1678  if (munmap((void *) header, num)) {
1679  perror("free munmap failed\n");
1680  }
1681  return;
1682  }
1683  sb = desc->SB;
1684  heap = desc->Heap;
1685  sz = desc->sz;
1686  // sz = heap->sc->sz;
1687  /* Small block */
1688  do {
1689  aret_t aret;
1690  LLA(oldanchor.all, &desc->Anchor.all);
1691  new_anchor.all = oldanchor.all;
1692  ASSERT(oldanchor.sep.valid);
1693  *(unsigned *) ptr = oldanchor.sep.head;
1694  /* need WBW fence before SC */
1695  LWSYNC; /* WBW */
1696  WBW;
1697  new_anchor.sep.head = (((char*) ptr - (char*) sb) / sz);
1698  if ((!oldanchor.sep.active) &&
1699  (oldanchor.sep.count == desc->maxcount - 1)) {
1700  // (oldanchor.sep.count == heap->sc->maxcount-1)) {
1701  new_anchor.sep.valid = FALSE;
1702  pdesc = desc->Pdesc;
1703  if (pdesc == (void*) 1) continue; /* RBW */
1704  RBW;
1705  } else
1706  new_anchor.sep.count++;
1707  SCA(aret, &desc->Anchor.all, oldanchor.all, new_anchor.all);
1708  if (aret)
1709  break;
1710  } while (1);
1711  if (!new_anchor.sep.valid) {
1712  sb_dealloc(sb, pdesc);
1713  list_desc_remove(heap);
1714  } else if ((!new_anchor.sep.active) && (new_anchor.sep.count == 1)) {
1715  LWSYNC; /* wbw */
1716  WBW;
1717  partial_push(heap, desc);
1718  }
1719  }
1720  /*---------------------------------------------------------------------------*/
1721 #endif /* PLDI2004 */
1722  /*---------------------------------------------------------------------------*/
1723 #if defined(PPC)
1724  void * __malloc__(size_t sz)
1725 #else
1726 
1727 void * malloc(size_t sz)
1728 #endif
1729  {
1730  procheap_t * heap;
1731  sizeclass_t * sc;
1732  void ** addr;
1733  unsigned ind;
1734  unsigned heapid;
1735 
1736 #if defined(PPC)
1737  /* __malloc_start__ is guaranteed to have been called */
1738 #else
1739  /* Check for initialization */
1740  if (controlblock == NULL) clfmalloc_init();
1741 #endif
1742 
1743  /* find sizeclass */
1744  sz += EIGHT;
1745  /* find procheap */
1746  for (ind = 0; ind < NUMSIZECLASSES; ind++) {
1747  if (sz <= scsz[ind])
1748  break;
1749  }
1750  if (ind == NUMSIZECLASSES) {
1751  /* Block > 64KB */
1752  if (sz & 7)
1753  sz = sz + 8 - (sz & 7);
1754  if ((addr = (void **) MMAP(sz)) == MAP_FAILED) {
1755  perror("malloc mmap failed\n");
1756  }
1757  ASSERT(addr);
1758  *addr = (void*) (sz + 1);
1759  LWSYNC; /* wbw - not needed in practice */
1760  WBW;
1761  return (void *) ((ptrsize_t) (addr) + EIGHT);
1762  }
1763  /* Block <= 64 KB */
1764  sc = &controlblock->sizeclass[ind];
1765  if (sc->numprocheaps == 1) {
1766  heap = &sc->procheap;
1767  } else {
1768  {
1769  unsigned id, x;
1770 #if defined(SPARC)
1771  id = thr_self();
1772 #else
1773  id = (unsigned long) pthread_self();
1774 #endif
1775  x = id & 0x7ff;
1776  x ^= (id >> 11) & 0x7ff;
1777  x ^= (id >> 22) & 0x7ff;
1778  // heapid = x % sc->numprocheaps;
1779  heapid = x & heapidmask;
1780  }
1781  heap = &controlblock->procheap[heapid][ind];
1782  }
1783  /* regular block */
1784  return malloc_regular(heap);
1785  }
1786 
1787  /*---------------------------------------------------------------------------*/
1788  static void * super_malloc(unsigned sz) {
1789  return malloc(sz);
1790  }
1791 
1792  /*---------------------------------------------------------------------------*/
1793  static void super_free(void * ptr) {
1794  free(ptr);
1795  }
1796  /*---------------------------------------------------------------------------*/
1797 #if defined(PPC)
1798  void * __calloc__(size_t n, size_t sz)
1799 #else
1800 
1801 void * calloc(size_t n, size_t sz)
1802 #endif
1803  {
1804  void * addr = malloc(n * sz);
1805  memset(addr, 0, n * sz);
1806  return addr;
1807  }
1808  /*---------------------------------------------------------------------------*/
1809 #if defined(PPC)
1810  void * __realloc__(void * ptr, size_t sz)
1811 #else
1812 
1813 void * realloc(void * ptr, size_t sz)
1814 #endif
1815  {
1816  desc_t ** header;
1817  size_t copysz;
1818  void * addr;
1819  desc_t * desc;
1820  ptrsize_t num;
1821 
1822  /* realloc zero frees */
1823  if (sz == 0) {
1824  free(ptr);
1825  return NULL;
1826  }
1827  /* find old size */
1828  if (!ptr)
1829  return malloc(sz);
1830  header = (desc_t**) (reinterpret_cast<char*> (ptr) - EIGHT);
1831  desc = *header;
1832  num = (ptrsize_t) desc;
1833  if (num & 1) {
1834  /* Big block */
1835  num--;
1836  num -= EIGHT;
1837  addr = malloc(sz);
1838  copysz = MIN(num, sz);
1839  memcpy(addr, ptr, copysz);
1840  free(ptr);
1841  return addr;
1842  }
1843  /* regular block */
1844  copysz = desc->sz - EIGHT;
1845  /* see if same block can accomodate new_ size */
1846  if (sz <= copysz)
1847  return ptr;
1848  /* need to reallocate */
1849  addr = malloc(sz);
1850  memcpy(addr, ptr, copysz);
1851  free(ptr);
1852  return addr;
1853  }
1854  /*---------------------------------------------------------------------------*/
1855 #if defined(PPC)
1856 #include <malloc.h>
1857 #include <errno.h>
1858 
1859 int __mallopt__(int cmd, int value) {
1860  errno = ENOSYS;
1861  perror("mallopt not supported");
1862  return 1;
1863  }
1864  /*---------------------------------------------------------------------------*/
1865  static struct mallinfo mi;
1866 
1867  struct mallinfo __mallinfo__() {
1868  errno = ENOSYS;
1869  perror("mallinfo not supported");
1870  return mi;
1871  }
1872 
1873  /*---------------------------------------------------------------------------*/
1874  int __posix_memalign__(void **p2p, size_t align, size_t sz) {
1875  errno = ENOSYS;
1876  perror("posix_memalign not supported");
1877  *p2p = NULL;
1878  return ENOSYS;
1879  }
1880 
1881  /*---------------------------------------------------------------------------*/
1882  void __malloc_init__() {
1883  }
1884 
1885  /*---------------------------------------------------------------------------*/
1886  void __malloc_prefork_lock__() {
1887  }
1888 
1889  /*---------------------------------------------------------------------------*/
1890  void __malloc_postfork_unlock__() {
1891  }
1892  /*---------------------------------------------------------------------------*/
1893 #endif
1894 
1895 
1896 
1897 #ifdef __cplusplus
1898 }
1899 #endif
1900 
1901 #endif /* CLMALLOC_H */
Definition: clfmalloc.h:363
Definition: clfmalloc.h:324
Definition: clfmalloc.h:337
Definition: clfmalloc.h:380
Definition: clfmalloc.h:355