Branch data Line data Source code
1 : : /* Drop in replacement for heapq.py
2 : :
3 : : C implementation derived directly from heapq.py in Py2.3
4 : : which was written by Kevin O'Connor, augmented by Tim Peters,
5 : : annotated by François Pinard, and converted to C by Raymond Hettinger.
6 : :
7 : : */
8 : :
9 : : #ifndef Py_BUILD_CORE_BUILTIN
10 : : # define Py_BUILD_CORE_MODULE 1
11 : : #endif
12 : :
13 : : #include "Python.h"
14 : : #include "pycore_list.h" // _PyList_ITEMS()
15 : :
16 : : #include "clinic/_heapqmodule.c.h"
17 : :
18 : :
19 : : /*[clinic input]
20 : : module _heapq
21 : : [clinic start generated code]*/
22 : : /*[clinic end generated code: output=da39a3ee5e6b4b0d input=d7cca0a2e4c0ceb3]*/
23 : :
24 : : static int
25 : 0 : siftdown(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
26 : : {
27 : : PyObject *newitem, *parent, **arr;
28 : : Py_ssize_t parentpos, size;
29 : : int cmp;
30 : :
31 : : assert(PyList_Check(heap));
32 : 0 : size = PyList_GET_SIZE(heap);
33 [ # # ]: 0 : if (pos >= size) {
34 : 0 : PyErr_SetString(PyExc_IndexError, "index out of range");
35 : 0 : return -1;
36 : : }
37 : :
38 : : /* Follow the path to the root, moving parents down until finding
39 : : a place newitem fits. */
40 : 0 : arr = _PyList_ITEMS(heap);
41 : 0 : newitem = arr[pos];
42 [ # # ]: 0 : while (pos > startpos) {
43 : 0 : parentpos = (pos - 1) >> 1;
44 : 0 : parent = arr[parentpos];
45 : 0 : Py_INCREF(newitem);
46 : 0 : Py_INCREF(parent);
47 : 0 : cmp = PyObject_RichCompareBool(newitem, parent, Py_LT);
48 : 0 : Py_DECREF(parent);
49 : 0 : Py_DECREF(newitem);
50 [ # # ]: 0 : if (cmp < 0)
51 : 0 : return -1;
52 [ # # ]: 0 : if (size != PyList_GET_SIZE(heap)) {
53 : 0 : PyErr_SetString(PyExc_RuntimeError,
54 : : "list changed size during iteration");
55 : 0 : return -1;
56 : : }
57 [ # # ]: 0 : if (cmp == 0)
58 : 0 : break;
59 : 0 : arr = _PyList_ITEMS(heap);
60 : 0 : parent = arr[parentpos];
61 : 0 : newitem = arr[pos];
62 : 0 : arr[parentpos] = newitem;
63 : 0 : arr[pos] = parent;
64 : 0 : pos = parentpos;
65 : : }
66 : 0 : return 0;
67 : : }
68 : :
69 : : static int
70 : 0 : siftup(PyListObject *heap, Py_ssize_t pos)
71 : : {
72 : : Py_ssize_t startpos, endpos, childpos, limit;
73 : : PyObject *tmp1, *tmp2, **arr;
74 : : int cmp;
75 : :
76 : : assert(PyList_Check(heap));
77 : 0 : endpos = PyList_GET_SIZE(heap);
78 : 0 : startpos = pos;
79 [ # # ]: 0 : if (pos >= endpos) {
80 : 0 : PyErr_SetString(PyExc_IndexError, "index out of range");
81 : 0 : return -1;
82 : : }
83 : :
84 : : /* Bubble up the smaller child until hitting a leaf. */
85 : 0 : arr = _PyList_ITEMS(heap);
86 : 0 : limit = endpos >> 1; /* smallest pos that has no child */
87 [ # # ]: 0 : while (pos < limit) {
88 : : /* Set childpos to index of smaller child. */
89 : 0 : childpos = 2*pos + 1; /* leftmost child position */
90 [ # # ]: 0 : if (childpos + 1 < endpos) {
91 : 0 : PyObject* a = arr[childpos];
92 : 0 : PyObject* b = arr[childpos + 1];
93 : 0 : Py_INCREF(a);
94 : 0 : Py_INCREF(b);
95 : 0 : cmp = PyObject_RichCompareBool(a, b, Py_LT);
96 : 0 : Py_DECREF(a);
97 : 0 : Py_DECREF(b);
98 [ # # ]: 0 : if (cmp < 0)
99 : 0 : return -1;
100 : 0 : childpos += ((unsigned)cmp ^ 1); /* increment when cmp==0 */
101 : 0 : arr = _PyList_ITEMS(heap); /* arr may have changed */
102 [ # # ]: 0 : if (endpos != PyList_GET_SIZE(heap)) {
103 : 0 : PyErr_SetString(PyExc_RuntimeError,
104 : : "list changed size during iteration");
105 : 0 : return -1;
106 : : }
107 : : }
108 : : /* Move the smaller child up. */
109 : 0 : tmp1 = arr[childpos];
110 : 0 : tmp2 = arr[pos];
111 : 0 : arr[childpos] = tmp2;
112 : 0 : arr[pos] = tmp1;
113 : 0 : pos = childpos;
114 : : }
115 : : /* Bubble it up to its final resting place (by sifting its parents down). */
116 : 0 : return siftdown(heap, startpos, pos);
117 : : }
118 : :
119 : : /*[clinic input]
120 : : _heapq.heappush
121 : :
122 : : heap: object(subclass_of='&PyList_Type')
123 : : item: object
124 : : /
125 : :
126 : : Push item onto heap, maintaining the heap invariant.
127 : : [clinic start generated code]*/
128 : :
129 : : static PyObject *
130 : 0 : _heapq_heappush_impl(PyObject *module, PyObject *heap, PyObject *item)
131 : : /*[clinic end generated code: output=912c094f47663935 input=7c69611f3698aceb]*/
132 : : {
133 [ # # ]: 0 : if (PyList_Append(heap, item))
134 : 0 : return NULL;
135 : :
136 [ # # ]: 0 : if (siftdown((PyListObject *)heap, 0, PyList_GET_SIZE(heap)-1))
137 : 0 : return NULL;
138 : 0 : Py_RETURN_NONE;
139 : : }
140 : :
141 : : static PyObject *
142 : 0 : heappop_internal(PyObject *heap, int siftup_func(PyListObject *, Py_ssize_t))
143 : : {
144 : : PyObject *lastelt, *returnitem;
145 : : Py_ssize_t n;
146 : :
147 : : /* raises IndexError if the heap is empty */
148 : 0 : n = PyList_GET_SIZE(heap);
149 [ # # ]: 0 : if (n == 0) {
150 : 0 : PyErr_SetString(PyExc_IndexError, "index out of range");
151 : 0 : return NULL;
152 : : }
153 : :
154 : 0 : lastelt = PyList_GET_ITEM(heap, n-1) ;
155 : 0 : Py_INCREF(lastelt);
156 [ # # ]: 0 : if (PyList_SetSlice(heap, n-1, n, NULL)) {
157 : 0 : Py_DECREF(lastelt);
158 : 0 : return NULL;
159 : : }
160 : 0 : n--;
161 : :
162 [ # # ]: 0 : if (!n)
163 : 0 : return lastelt;
164 : 0 : returnitem = PyList_GET_ITEM(heap, 0);
165 : 0 : PyList_SET_ITEM(heap, 0, lastelt);
166 [ # # ]: 0 : if (siftup_func((PyListObject *)heap, 0)) {
167 : 0 : Py_DECREF(returnitem);
168 : 0 : return NULL;
169 : : }
170 : 0 : return returnitem;
171 : : }
172 : :
173 : : /*[clinic input]
174 : : _heapq.heappop
175 : :
176 : : heap: object(subclass_of='&PyList_Type')
177 : : /
178 : :
179 : : Pop the smallest item off the heap, maintaining the heap invariant.
180 : : [clinic start generated code]*/
181 : :
182 : : static PyObject *
183 : 0 : _heapq_heappop_impl(PyObject *module, PyObject *heap)
184 : : /*[clinic end generated code: output=96dfe82d37d9af76 input=91487987a583c856]*/
185 : : {
186 : 0 : return heappop_internal(heap, siftup);
187 : : }
188 : :
189 : : static PyObject *
190 : 0 : heapreplace_internal(PyObject *heap, PyObject *item, int siftup_func(PyListObject *, Py_ssize_t))
191 : : {
192 : : PyObject *returnitem;
193 : :
194 [ # # ]: 0 : if (PyList_GET_SIZE(heap) == 0) {
195 : 0 : PyErr_SetString(PyExc_IndexError, "index out of range");
196 : 0 : return NULL;
197 : : }
198 : :
199 : 0 : returnitem = PyList_GET_ITEM(heap, 0);
200 : 0 : PyList_SET_ITEM(heap, 0, Py_NewRef(item));
201 [ # # ]: 0 : if (siftup_func((PyListObject *)heap, 0)) {
202 : 0 : Py_DECREF(returnitem);
203 : 0 : return NULL;
204 : : }
205 : 0 : return returnitem;
206 : : }
207 : :
208 : :
209 : : /*[clinic input]
210 : : _heapq.heapreplace
211 : :
212 : : heap: object(subclass_of='&PyList_Type')
213 : : item: object
214 : : /
215 : :
216 : : Pop and return the current smallest value, and add the new item.
217 : :
218 : : This is more efficient than heappop() followed by heappush(), and can be
219 : : more appropriate when using a fixed-size heap. Note that the value
220 : : returned may be larger than item! That constrains reasonable uses of
221 : : this routine unless written as part of a conditional replacement:
222 : :
223 : : if item > heap[0]:
224 : : item = heapreplace(heap, item)
225 : : [clinic start generated code]*/
226 : :
227 : : static PyObject *
228 : 0 : _heapq_heapreplace_impl(PyObject *module, PyObject *heap, PyObject *item)
229 : : /*[clinic end generated code: output=82ea55be8fbe24b4 input=719202ac02ba10c8]*/
230 : : {
231 : 0 : return heapreplace_internal(heap, item, siftup);
232 : : }
233 : :
234 : : /*[clinic input]
235 : : _heapq.heappushpop
236 : :
237 : : heap: object(subclass_of='&PyList_Type')
238 : : item: object
239 : : /
240 : :
241 : : Push item on the heap, then pop and return the smallest item from the heap.
242 : :
243 : : The combined action runs more efficiently than heappush() followed by
244 : : a separate call to heappop().
245 : : [clinic start generated code]*/
246 : :
247 : : static PyObject *
248 : 0 : _heapq_heappushpop_impl(PyObject *module, PyObject *heap, PyObject *item)
249 : : /*[clinic end generated code: output=67231dc98ed5774f input=5dc701f1eb4a4aa7]*/
250 : : {
251 : : PyObject *returnitem;
252 : : int cmp;
253 : :
254 [ # # ]: 0 : if (PyList_GET_SIZE(heap) == 0) {
255 : 0 : return Py_NewRef(item);
256 : : }
257 : :
258 : 0 : PyObject* top = PyList_GET_ITEM(heap, 0);
259 : 0 : Py_INCREF(top);
260 : 0 : cmp = PyObject_RichCompareBool(top, item, Py_LT);
261 : 0 : Py_DECREF(top);
262 [ # # ]: 0 : if (cmp < 0)
263 : 0 : return NULL;
264 [ # # ]: 0 : if (cmp == 0) {
265 : 0 : return Py_NewRef(item);
266 : : }
267 : :
268 [ # # ]: 0 : if (PyList_GET_SIZE(heap) == 0) {
269 : 0 : PyErr_SetString(PyExc_IndexError, "index out of range");
270 : 0 : return NULL;
271 : : }
272 : :
273 : 0 : returnitem = PyList_GET_ITEM(heap, 0);
274 : 0 : PyList_SET_ITEM(heap, 0, Py_NewRef(item));
275 [ # # ]: 0 : if (siftup((PyListObject *)heap, 0)) {
276 : 0 : Py_DECREF(returnitem);
277 : 0 : return NULL;
278 : : }
279 : 0 : return returnitem;
280 : : }
281 : :
282 : : static Py_ssize_t
283 : 0 : keep_top_bit(Py_ssize_t n)
284 : : {
285 : 0 : int i = 0;
286 : :
287 [ # # ]: 0 : while (n > 1) {
288 : 0 : n >>= 1;
289 : 0 : i++;
290 : : }
291 : 0 : return n << i;
292 : : }
293 : :
294 : : /* Cache friendly version of heapify()
295 : : -----------------------------------
296 : :
297 : : Build-up a heap in O(n) time by performing siftup() operations
298 : : on nodes whose children are already heaps.
299 : :
300 : : The simplest way is to sift the nodes in reverse order from
301 : : n//2-1 to 0 inclusive. The downside is that children may be
302 : : out of cache by the time their parent is reached.
303 : :
304 : : A better way is to not wait for the children to go out of cache.
305 : : Once a sibling pair of child nodes have been sifted, immediately
306 : : sift their parent node (while the children are still in cache).
307 : :
308 : : Both ways build child heaps before their parents, so both ways
309 : : do the exact same number of comparisons and produce exactly
310 : : the same heap. The only difference is that the traversal
311 : : order is optimized for cache efficiency.
312 : : */
313 : :
314 : : static PyObject *
315 : 0 : cache_friendly_heapify(PyObject *heap, int siftup_func(PyListObject *, Py_ssize_t))
316 : : {
317 : : Py_ssize_t i, j, m, mhalf, leftmost;
318 : :
319 : 0 : m = PyList_GET_SIZE(heap) >> 1; /* index of first childless node */
320 : 0 : leftmost = keep_top_bit(m + 1) - 1; /* leftmost node in row of m */
321 : 0 : mhalf = m >> 1; /* parent of first childless node */
322 : :
323 [ # # ]: 0 : for (i = leftmost - 1 ; i >= mhalf ; i--) {
324 : 0 : j = i;
325 : : while (1) {
326 [ # # ]: 0 : if (siftup_func((PyListObject *)heap, j))
327 : 0 : return NULL;
328 [ # # ]: 0 : if (!(j & 1))
329 : 0 : break;
330 : 0 : j >>= 1;
331 : : }
332 : : }
333 : :
334 [ # # ]: 0 : for (i = m - 1 ; i >= leftmost ; i--) {
335 : 0 : j = i;
336 : : while (1) {
337 [ # # ]: 0 : if (siftup_func((PyListObject *)heap, j))
338 : 0 : return NULL;
339 [ # # ]: 0 : if (!(j & 1))
340 : 0 : break;
341 : 0 : j >>= 1;
342 : : }
343 : : }
344 : 0 : Py_RETURN_NONE;
345 : : }
346 : :
347 : : static PyObject *
348 : 0 : heapify_internal(PyObject *heap, int siftup_func(PyListObject *, Py_ssize_t))
349 : : {
350 : : Py_ssize_t i, n;
351 : :
352 : : /* For heaps likely to be bigger than L1 cache, we use the cache
353 : : friendly heapify function. For smaller heaps that fit entirely
354 : : in cache, we prefer the simpler algorithm with less branching.
355 : : */
356 : 0 : n = PyList_GET_SIZE(heap);
357 [ # # ]: 0 : if (n > 2500)
358 : 0 : return cache_friendly_heapify(heap, siftup_func);
359 : :
360 : : /* Transform bottom-up. The largest index there's any point to
361 : : looking at is the largest with a child index in-range, so must
362 : : have 2*i + 1 < n, or i < (n-1)/2. If n is even = 2*j, this is
363 : : (2*j-1)/2 = j-1/2 so j-1 is the largest, which is n//2 - 1. If
364 : : n is odd = 2*j+1, this is (2*j+1-1)/2 = j so j-1 is the largest,
365 : : and that's again n//2-1.
366 : : */
367 [ # # ]: 0 : for (i = (n >> 1) - 1 ; i >= 0 ; i--)
368 [ # # ]: 0 : if (siftup_func((PyListObject *)heap, i))
369 : 0 : return NULL;
370 : 0 : Py_RETURN_NONE;
371 : : }
372 : :
373 : : /*[clinic input]
374 : : _heapq.heapify
375 : :
376 : : heap: object(subclass_of='&PyList_Type')
377 : : /
378 : :
379 : : Transform list into a heap, in-place, in O(len(heap)) time.
380 : : [clinic start generated code]*/
381 : :
382 : : static PyObject *
383 : 0 : _heapq_heapify_impl(PyObject *module, PyObject *heap)
384 : : /*[clinic end generated code: output=e63a636fcf83d6d0 input=53bb7a2166febb73]*/
385 : : {
386 : 0 : return heapify_internal(heap, siftup);
387 : : }
388 : :
389 : : static int
390 : 0 : siftdown_max(PyListObject *heap, Py_ssize_t startpos, Py_ssize_t pos)
391 : : {
392 : : PyObject *newitem, *parent, **arr;
393 : : Py_ssize_t parentpos, size;
394 : : int cmp;
395 : :
396 : : assert(PyList_Check(heap));
397 : 0 : size = PyList_GET_SIZE(heap);
398 [ # # ]: 0 : if (pos >= size) {
399 : 0 : PyErr_SetString(PyExc_IndexError, "index out of range");
400 : 0 : return -1;
401 : : }
402 : :
403 : : /* Follow the path to the root, moving parents down until finding
404 : : a place newitem fits. */
405 : 0 : arr = _PyList_ITEMS(heap);
406 : 0 : newitem = arr[pos];
407 [ # # ]: 0 : while (pos > startpos) {
408 : 0 : parentpos = (pos - 1) >> 1;
409 : 0 : parent = Py_NewRef(arr[parentpos]);
410 : 0 : Py_INCREF(newitem);
411 : 0 : cmp = PyObject_RichCompareBool(parent, newitem, Py_LT);
412 : 0 : Py_DECREF(parent);
413 : 0 : Py_DECREF(newitem);
414 [ # # ]: 0 : if (cmp < 0)
415 : 0 : return -1;
416 [ # # ]: 0 : if (size != PyList_GET_SIZE(heap)) {
417 : 0 : PyErr_SetString(PyExc_RuntimeError,
418 : : "list changed size during iteration");
419 : 0 : return -1;
420 : : }
421 [ # # ]: 0 : if (cmp == 0)
422 : 0 : break;
423 : 0 : arr = _PyList_ITEMS(heap);
424 : 0 : parent = arr[parentpos];
425 : 0 : newitem = arr[pos];
426 : 0 : arr[parentpos] = newitem;
427 : 0 : arr[pos] = parent;
428 : 0 : pos = parentpos;
429 : : }
430 : 0 : return 0;
431 : : }
432 : :
433 : : static int
434 : 0 : siftup_max(PyListObject *heap, Py_ssize_t pos)
435 : : {
436 : : Py_ssize_t startpos, endpos, childpos, limit;
437 : : PyObject *tmp1, *tmp2, **arr;
438 : : int cmp;
439 : :
440 : : assert(PyList_Check(heap));
441 : 0 : endpos = PyList_GET_SIZE(heap);
442 : 0 : startpos = pos;
443 [ # # ]: 0 : if (pos >= endpos) {
444 : 0 : PyErr_SetString(PyExc_IndexError, "index out of range");
445 : 0 : return -1;
446 : : }
447 : :
448 : : /* Bubble up the smaller child until hitting a leaf. */
449 : 0 : arr = _PyList_ITEMS(heap);
450 : 0 : limit = endpos >> 1; /* smallest pos that has no child */
451 [ # # ]: 0 : while (pos < limit) {
452 : : /* Set childpos to index of smaller child. */
453 : 0 : childpos = 2*pos + 1; /* leftmost child position */
454 [ # # ]: 0 : if (childpos + 1 < endpos) {
455 : 0 : PyObject* a = arr[childpos + 1];
456 : 0 : PyObject* b = arr[childpos];
457 : 0 : Py_INCREF(a);
458 : 0 : Py_INCREF(b);
459 : 0 : cmp = PyObject_RichCompareBool(a, b, Py_LT);
460 : 0 : Py_DECREF(a);
461 : 0 : Py_DECREF(b);
462 [ # # ]: 0 : if (cmp < 0)
463 : 0 : return -1;
464 : 0 : childpos += ((unsigned)cmp ^ 1); /* increment when cmp==0 */
465 : 0 : arr = _PyList_ITEMS(heap); /* arr may have changed */
466 [ # # ]: 0 : if (endpos != PyList_GET_SIZE(heap)) {
467 : 0 : PyErr_SetString(PyExc_RuntimeError,
468 : : "list changed size during iteration");
469 : 0 : return -1;
470 : : }
471 : : }
472 : : /* Move the smaller child up. */
473 : 0 : tmp1 = arr[childpos];
474 : 0 : tmp2 = arr[pos];
475 : 0 : arr[childpos] = tmp2;
476 : 0 : arr[pos] = tmp1;
477 : 0 : pos = childpos;
478 : : }
479 : : /* Bubble it up to its final resting place (by sifting its parents down). */
480 : 0 : return siftdown_max(heap, startpos, pos);
481 : : }
482 : :
483 : :
484 : : /*[clinic input]
485 : : _heapq._heappop_max
486 : :
487 : : heap: object(subclass_of='&PyList_Type')
488 : : /
489 : :
490 : : Maxheap variant of heappop.
491 : : [clinic start generated code]*/
492 : :
493 : : static PyObject *
494 : 0 : _heapq__heappop_max_impl(PyObject *module, PyObject *heap)
495 : : /*[clinic end generated code: output=9e77aadd4e6a8760 input=362c06e1c7484793]*/
496 : : {
497 : 0 : return heappop_internal(heap, siftup_max);
498 : : }
499 : :
500 : : /*[clinic input]
501 : : _heapq._heapreplace_max
502 : :
503 : : heap: object(subclass_of='&PyList_Type')
504 : : item: object
505 : : /
506 : :
507 : : Maxheap variant of heapreplace.
508 : : [clinic start generated code]*/
509 : :
510 : : static PyObject *
511 : 0 : _heapq__heapreplace_max_impl(PyObject *module, PyObject *heap,
512 : : PyObject *item)
513 : : /*[clinic end generated code: output=8ad7545e4a5e8adb input=f2dd27cbadb948d7]*/
514 : : {
515 : 0 : return heapreplace_internal(heap, item, siftup_max);
516 : : }
517 : :
518 : : /*[clinic input]
519 : : _heapq._heapify_max
520 : :
521 : : heap: object(subclass_of='&PyList_Type')
522 : : /
523 : :
524 : : Maxheap variant of heapify.
525 : : [clinic start generated code]*/
526 : :
527 : : static PyObject *
528 : 0 : _heapq__heapify_max_impl(PyObject *module, PyObject *heap)
529 : : /*[clinic end generated code: output=2cb028beb4a8b65e input=c1f765ee69f124b8]*/
530 : : {
531 : 0 : return heapify_internal(heap, siftup_max);
532 : : }
533 : :
534 : : static PyMethodDef heapq_methods[] = {
535 : : _HEAPQ_HEAPPUSH_METHODDEF
536 : : _HEAPQ_HEAPPUSHPOP_METHODDEF
537 : : _HEAPQ_HEAPPOP_METHODDEF
538 : : _HEAPQ_HEAPREPLACE_METHODDEF
539 : : _HEAPQ_HEAPIFY_METHODDEF
540 : : _HEAPQ__HEAPPOP_MAX_METHODDEF
541 : : _HEAPQ__HEAPIFY_MAX_METHODDEF
542 : : _HEAPQ__HEAPREPLACE_MAX_METHODDEF
543 : : {NULL, NULL} /* sentinel */
544 : : };
545 : :
546 : : PyDoc_STRVAR(module_doc,
547 : : "Heap queue algorithm (a.k.a. priority queue).\n\
548 : : \n\
549 : : Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
550 : : all k, counting elements from 0. For the sake of comparison,\n\
551 : : non-existing elements are considered to be infinite. The interesting\n\
552 : : property of a heap is that a[0] is always its smallest element.\n\
553 : : \n\
554 : : Usage:\n\
555 : : \n\
556 : : heap = [] # creates an empty heap\n\
557 : : heappush(heap, item) # pushes a new item on the heap\n\
558 : : item = heappop(heap) # pops the smallest item from the heap\n\
559 : : item = heap[0] # smallest item on the heap without popping it\n\
560 : : heapify(x) # transforms list into a heap, in-place, in linear time\n\
561 : : item = heapreplace(heap, item) # pops and returns smallest item, and adds\n\
562 : : # new item; the heap size is unchanged\n\
563 : : \n\
564 : : Our API differs from textbook heap algorithms as follows:\n\
565 : : \n\
566 : : - We use 0-based indexing. This makes the relationship between the\n\
567 : : index for a node and the indexes for its children slightly less\n\
568 : : obvious, but is more suitable since Python uses 0-based indexing.\n\
569 : : \n\
570 : : - Our heappop() method returns the smallest item, not the largest.\n\
571 : : \n\
572 : : These two make it possible to view the heap as a regular Python list\n\
573 : : without surprises: heap[0] is the smallest item, and heap.sort()\n\
574 : : maintains the heap invariant!\n");
575 : :
576 : :
577 : : PyDoc_STRVAR(__about__,
578 : : "Heap queues\n\
579 : : \n\
580 : : [explanation by Fran\xc3\xa7ois Pinard]\n\
581 : : \n\
582 : : Heaps are arrays for which a[k] <= a[2*k+1] and a[k] <= a[2*k+2] for\n\
583 : : all k, counting elements from 0. For the sake of comparison,\n\
584 : : non-existing elements are considered to be infinite. The interesting\n\
585 : : property of a heap is that a[0] is always its smallest element.\n"
586 : : "\n\
587 : : The strange invariant above is meant to be an efficient memory\n\
588 : : representation for a tournament. The numbers below are `k', not a[k]:\n\
589 : : \n\
590 : : 0\n\
591 : : \n\
592 : : 1 2\n\
593 : : \n\
594 : : 3 4 5 6\n\
595 : : \n\
596 : : 7 8 9 10 11 12 13 14\n\
597 : : \n\
598 : : 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30\n\
599 : : \n\
600 : : \n\
601 : : In the tree above, each cell `k' is topping `2*k+1' and `2*k+2'. In\n\
602 : : a usual binary tournament we see in sports, each cell is the winner\n\
603 : : over the two cells it tops, and we can trace the winner down the tree\n\
604 : : to see all opponents s/he had. However, in many computer applications\n\
605 : : of such tournaments, we do not need to trace the history of a winner.\n\
606 : : To be more memory efficient, when a winner is promoted, we try to\n\
607 : : replace it by something else at a lower level, and the rule becomes\n\
608 : : that a cell and the two cells it tops contain three different items,\n\
609 : : but the top cell \"wins\" over the two topped cells.\n"
610 : : "\n\
611 : : If this heap invariant is protected at all time, index 0 is clearly\n\
612 : : the overall winner. The simplest algorithmic way to remove it and\n\
613 : : find the \"next\" winner is to move some loser (let's say cell 30 in the\n\
614 : : diagram above) into the 0 position, and then percolate this new 0 down\n\
615 : : the tree, exchanging values, until the invariant is re-established.\n\
616 : : This is clearly logarithmic on the total number of items in the tree.\n\
617 : : By iterating over all items, you get an O(n ln n) sort.\n"
618 : : "\n\
619 : : A nice feature of this sort is that you can efficiently insert new\n\
620 : : items while the sort is going on, provided that the inserted items are\n\
621 : : not \"better\" than the last 0'th element you extracted. This is\n\
622 : : especially useful in simulation contexts, where the tree holds all\n\
623 : : incoming events, and the \"win\" condition means the smallest scheduled\n\
624 : : time. When an event schedule other events for execution, they are\n\
625 : : scheduled into the future, so they can easily go into the heap. So, a\n\
626 : : heap is a good structure for implementing schedulers (this is what I\n\
627 : : used for my MIDI sequencer :-).\n"
628 : : "\n\
629 : : Various structures for implementing schedulers have been extensively\n\
630 : : studied, and heaps are good for this, as they are reasonably speedy,\n\
631 : : the speed is almost constant, and the worst case is not much different\n\
632 : : than the average case. However, there are other representations which\n\
633 : : are more efficient overall, yet the worst cases might be terrible.\n"
634 : : "\n\
635 : : Heaps are also very useful in big disk sorts. You most probably all\n\
636 : : know that a big sort implies producing \"runs\" (which are pre-sorted\n\
637 : : sequences, which size is usually related to the amount of CPU memory),\n\
638 : : followed by a merging passes for these runs, which merging is often\n\
639 : : very cleverly organised[1]. It is very important that the initial\n\
640 : : sort produces the longest runs possible. Tournaments are a good way\n\
641 : : to that. If, using all the memory available to hold a tournament, you\n\
642 : : replace and percolate items that happen to fit the current run, you'll\n\
643 : : produce runs which are twice the size of the memory for random input,\n\
644 : : and much better for input fuzzily ordered.\n"
645 : : "\n\
646 : : Moreover, if you output the 0'th item on disk and get an input which\n\
647 : : may not fit in the current tournament (because the value \"wins\" over\n\
648 : : the last output value), it cannot fit in the heap, so the size of the\n\
649 : : heap decreases. The freed memory could be cleverly reused immediately\n\
650 : : for progressively building a second heap, which grows at exactly the\n\
651 : : same rate the first heap is melting. When the first heap completely\n\
652 : : vanishes, you switch heaps and start a new run. Clever and quite\n\
653 : : effective!\n\
654 : : \n\
655 : : In a word, heaps are useful memory structures to know. I use them in\n\
656 : : a few applications, and I think it is good to keep a `heap' module\n\
657 : : around. :-)\n"
658 : : "\n\
659 : : --------------------\n\
660 : : [1] The disk balancing algorithms which are current, nowadays, are\n\
661 : : more annoying than clever, and this is a consequence of the seeking\n\
662 : : capabilities of the disks. On devices which cannot seek, like big\n\
663 : : tape drives, the story was quite different, and one had to be very\n\
664 : : clever to ensure (far in advance) that each tape movement will be the\n\
665 : : most effective possible (that is, will best participate at\n\
666 : : \"progressing\" the merge). Some tapes were even able to read\n\
667 : : backwards, and this was also used to avoid the rewinding time.\n\
668 : : Believe me, real good tape sorts were quite spectacular to watch!\n\
669 : : From all times, sorting has always been a Great Art! :-)\n");
670 : :
671 : :
672 : : static int
673 : 3 : heapq_exec(PyObject *m)
674 : : {
675 : 3 : PyObject *about = PyUnicode_FromString(__about__);
676 [ - + ]: 3 : if (PyModule_AddObject(m, "__about__", about) < 0) {
677 : 0 : Py_DECREF(about);
678 : 0 : return -1;
679 : : }
680 : 3 : return 0;
681 : : }
682 : :
683 : : static struct PyModuleDef_Slot heapq_slots[] = {
684 : : {Py_mod_exec, heapq_exec},
685 : : {0, NULL}
686 : : };
687 : :
688 : : static struct PyModuleDef _heapqmodule = {
689 : : PyModuleDef_HEAD_INIT,
690 : : "_heapq",
691 : : module_doc,
692 : : 0,
693 : : heapq_methods,
694 : : heapq_slots,
695 : : NULL,
696 : : NULL,
697 : : NULL
698 : : };
699 : :
700 : : PyMODINIT_FUNC
701 : 3 : PyInit__heapq(void)
702 : : {
703 : 3 : return PyModuleDef_Init(&_heapqmodule);
704 : : }
|