-
-
Notifications
You must be signed in to change notification settings - Fork 700
/
Copy pathbuild.py
1497 lines (1268 loc) · 55.3 KB
/
build.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""Turn an element tree with style into a "before layout" box tree.
This includes creating anonymous boxes and processing whitespace as necessary.
"""
import re
import unicodedata
from .. import html
from ..css import properties, targets
from ..layout.table import collapse_table_borders
from ..logger import LOGGER
from ..text.constants import get_lang_quotes
from . import boxes
# Maps values of the ``display`` CSS property to box types.
BOX_TYPE_FROM_DISPLAY = {
('block', 'flow'): boxes.BlockBox,
('inline', 'flow'): boxes.InlineBox,
('block', 'flow-root'): boxes.BlockBox,
('inline', 'flow-root'): boxes.InlineBlockBox,
('block', 'table'): boxes.TableBox,
('inline', 'table'): boxes.InlineTableBox,
('block', 'flex'): boxes.FlexBox,
('inline', 'flex'): boxes.InlineFlexBox,
('block', 'grid'): boxes.GridBox,
('inline', 'grid'): boxes.InlineGridBox,
('table-row',): boxes.TableRowBox,
('table-row-group',): boxes.TableRowGroupBox,
('table-header-group',): boxes.TableRowGroupBox,
('table-footer-group',): boxes.TableRowGroupBox,
('table-column',): boxes.TableColumnBox,
('table-column-group',): boxes.TableColumnGroupBox,
('table-cell',): boxes.TableCellBox,
('table-caption',): boxes.TableCaptionBox,
}
# https://stackoverflow.com/questions/16317534/
ASCII_TO_WIDE = {i: chr(i + 0xfee0) for i in range(0x21, 0x7f)}
ASCII_TO_WIDE.update({0x20: '\u3000', 0x2D: '\u2212'})
LINE_FEED_RE = re.compile('\r\n?')
TAB_RE = re.compile('[\t ]*\n[\t ]*')
SPACE_RE = re.compile('[\t ]+')
def create_anonymous_boxes(box):
"""Create anonymous boxes in box descendants according to layout rules."""
box = anonymous_table_boxes(box)
box = flex_boxes(box)
box = grid_boxes(box)
box = inline_in_block(box)
box = block_in_inline(box)
return box
def build_formatting_structure(element_tree, style_for, get_image_from_uri,
base_url, target_collector, counter_style,
footnotes):
"""Build a formatting structure (box tree) from an element tree."""
box_list = element_to_box(
element_tree, style_for, get_image_from_uri, base_url,
target_collector, counter_style, footnotes)
if box_list:
box, = box_list
else:
# No root element
def root_style_for(element, pseudo_type=None):
style = style_for(element, pseudo_type)
if style is not None:
if element == element_tree:
style['display'] = ('block', 'flow')
else:
style['display'] = ('none',)
return style
box, = element_to_box(
element_tree, root_style_for, get_image_from_uri, base_url,
target_collector, counter_style, footnotes)
target_collector.check_pending_targets()
box.is_for_root_element = True
# If this is changed, maybe update weasy.layout.page.make_margin_boxes()
box = create_anonymous_boxes(box)
box = set_viewport_overflow(box)
return box
def make_box(element_tag, style, content, element):
return BOX_TYPE_FROM_DISPLAY[style['display'][:2]](
element_tag, style, element, content)
def element_to_box(element, style_for, get_image_from_uri, base_url,
target_collector, counter_style, footnotes, state=None):
"""Convert an element and its children into a box with children.
Return a list of boxes. Most of the time the list will have one item but
may have zero or more than one.
Eg.::
<p>Some <em>emphasised</em> text.</p>
gives (not actual syntax)::
BlockBox[
TextBox['Some '],
InlineBox[
TextBox['emphasised'],
],
TextBox[' text.'],
]
``TextBox``es are anonymous inline boxes:
See https://www.w3.org/TR/CSS21/visuren.html#anonymous
"""
if not isinstance(element.tag, str):
# We ignore comments and XML processing instructions.
return []
style = style_for(element)
# TODO: should be the used value. When does the used value for `display`
# differ from the computer value?
display = style['display']
if display == ('none',):
return []
if style['float'] == 'footnote':
if style['footnote_display'] == 'block':
style['display'] = ('block', 'flow')
else:
# TODO: handle compact footnotes
style['display'] = ('inline', 'flow')
box = make_box(element.tag, style, [], element)
if state is None:
# use a list to have a shared mutable object
state = (
# Shared mutable objects:
[0], # quote_depth: single integer
# TODO: define the footnote counter where it can be updated by page
{'footnote': [0]}, # counter_values: name -> stacked/scoped values
[{'footnote'}] # counter_scopes: element depths -> counter names
)
quote_depth, counter_values, counter_scopes = state
update_counters(state, style)
children = []
# If this element’s direct children create new scopes, the counter
# names will be in this new list
counter_scopes.append(set())
box.first_letter_style = style_for(element, 'first-letter')
box.first_line_style = style_for(element, 'first-line')
marker_boxes = []
if 'list-item' in style['display']:
marker_boxes = list(marker_to_box(
element, state, style, style_for, get_image_from_uri,
target_collector, counter_style))
children.extend(marker_boxes)
children.extend(before_after_to_box(
element, 'before', state, style_for, get_image_from_uri,
target_collector, counter_style))
# collect anchor's counter_values, maybe it's a target.
# to get the spec-conform counter_values we must do it here,
# after the ::before is parsed and before the ::after is
if style['anchor']:
target_collector.store_target(style['anchor'], counter_values, box)
text = element.text
if text:
children.append(boxes.TextBox.anonymous_from(box, text))
for child_element in element:
child_boxes = element_to_box(
child_element, style_for, get_image_from_uri, base_url,
target_collector, counter_style, footnotes, state)
if child_boxes and child_boxes[0].style['float'] == 'footnote':
footnote = child_boxes[0]
footnote.style['float'] = 'none'
footnotes.append(footnote)
call_style = style_for(element, 'footnote-call')
footnote_call = make_box(
f'{element.tag}::footnote-call', call_style, [], element)
footnote_call.children = content_to_boxes(
call_style, footnote_call, quote_depth, counter_values,
get_image_from_uri, target_collector, counter_style)
footnote_call.footnote = footnote
child_boxes = [footnote_call]
children.extend(child_boxes)
text = child_element.tail
if text:
text_box = boxes.TextBox.anonymous_from(box, text)
if children and isinstance(children[-1], boxes.TextBox):
children[-1].text += text_box.text
else:
children.append(text_box)
children.extend(before_after_to_box(
element, 'after', state, style_for, get_image_from_uri,
target_collector, counter_style))
# Scopes created by this element’s children stop here.
for name in counter_scopes.pop():
counter_values[name].pop()
if not counter_values[name]:
counter_values.pop(name)
box.children = children
process_whitespace(box)
set_content_lists(
element, box, style, counter_values, target_collector, counter_style)
process_text_transform(box)
if marker_boxes and len(box.children) == 1:
# See https://www.w3.org/TR/css-lists-3/#list-style-position-outside
#
# "The size or contents of the marker box may affect the height of the
# principal block box and/or the height of its first line box, and in
# some cases may cause the creation of a new line box; this
# interaction is also not defined."
#
# We decide here to add a zero-width space to have a minimum
# height. Adding text boxes is not the best idea, but it's not a good
# moment to add an empty line box, and the specification lets us do
# almost what we want, so…
if style['list_style_position'] == 'outside':
box.children.append(boxes.TextBox.anonymous_from(box, ''))
if style['float'] == 'footnote':
counter_values['footnote'][-1] += 1
marker_style = style_for(element, 'footnote-marker')
marker = make_box(
f'{element.tag}::footnote-marker', marker_style, [], element)
marker.children = content_to_boxes(
marker_style, box, quote_depth, counter_values, get_image_from_uri,
target_collector, counter_style)
box.children.insert(0, marker)
# Specific handling for the element. (eg. replaced element)
return html.handle_element(element, box, get_image_from_uri, base_url)
def before_after_to_box(element, pseudo_type, state, style_for,
get_image_from_uri, target_collector, counter_style):
"""Return the boxes for ::before or ::after pseudo-element."""
style = style_for(element, pseudo_type)
if pseudo_type and style is None:
# Pseudo-elements with no style at all do not get a style dict.
# Their initial content property computes to 'none'.
return []
# TODO: should be the computed value. When does the used value for
# `display` differ from the computer value? It's at least wrong for
# `content` where 'normal' computes as 'inhibit' for pseudo elements.
display = style['display']
if display == ('none',):
return []
content = style['content']
if content in ('normal', 'inhibit', 'none'):
return []
box = make_box(f'{element.tag}::{pseudo_type}', style, [], element)
quote_depth, counter_values, _counter_scopes = state
update_counters(state, style)
children = []
if 'list-item' in display:
marker_boxes = list(marker_to_box(
element, state, style, style_for, get_image_from_uri,
target_collector, counter_style))
children.extend(marker_boxes)
children.extend(content_to_boxes(
style, box, quote_depth, counter_values, get_image_from_uri,
target_collector, counter_style))
box.children = children
# calculate the bookmark-label
if style['bookmark_level'] != 'none':
_quote_depth, counter_values, _counter_scopes = state
compute_bookmark_label(
element, box, style['bookmark_label'], counter_values,
target_collector, counter_style)
return [box]
def marker_to_box(element, state, parent_style, style_for, get_image_from_uri,
target_collector, counter_style):
"""Yield the box for ::marker pseudo-element if there is one.
https://drafts.csswg.org/css-lists-3/#marker-pseudo
"""
style = style_for(element, 'marker')
children = []
# TODO: should be the computed value. When does the used value for
# `display` differ from the computer value? It's at least wrong for
# `content` where 'normal' computes as 'inhibit' for pseudo elements.
quote_depth, counter_values, _counter_scopes = state
box = make_box(f'{element.tag}::marker', style, children, element)
if style['display'] == ('none',):
return
image_type, image = style['list_style_image']
if style['content'] not in ('normal', 'inhibit'):
children.extend(content_to_boxes(
style, box, quote_depth, counter_values, get_image_from_uri,
target_collector, counter_style))
else:
if image_type == 'url':
# image may be None here too, in case the image is not available.
image = get_image_from_uri(
url=image, orientation=style['image_orientation'])
if image is not None:
box = boxes.InlineReplacedBox.anonymous_from(box, image)
children.append(box)
if not children and style['list_style_type'] != 'none':
counter_value = counter_values.get('list-item', [0])[-1]
counter_type = style['list_style_type']
# TODO: rtl numbered list has the dot on the left
if marker_text := counter_style.render_marker(counter_type, counter_value):
box = boxes.TextBox.anonymous_from(box, marker_text)
box.style['white_space'] = 'pre-wrap'
children.append(box)
if not children:
return
if parent_style['list_style_position'] == 'outside':
marker_box = boxes.BlockBox.anonymous_from(box, children)
# We can safely edit everything that can't be changed by user style
# See https://drafts.csswg.org/css-pseudo-4/#marker-pseudo
marker_box.style['position'] = 'absolute'
if parent_style['direction'] == 'ltr':
translate_x = properties.Dimension(-100, '%')
else:
translate_x = properties.Dimension(100, '%')
translate_y = properties.ZERO_PIXELS
marker_box.style['transform'] = (
('translate', (translate_x, translate_y)),)
else:
marker_box = boxes.InlineBox.anonymous_from(box, children)
yield marker_box
def compute_content_list(content_list, parent_box, counter_values, css_token,
parse_again, target_collector, counter_style,
get_image_from_uri=None, quote_depth=None,
quote_style=None, lang=None, context=None, page=None,
element=None):
"""Compute and return the boxes corresponding to the ``content_list``.
``parse_again`` is called to compute the ``content_list`` again when
``target_collector.lookup_target()`` detected a pending target.
``build_formatting_structure`` calls
``target_collector.check_pending_targets()`` after the first pass to do
required reparsing.
"""
# TODO: Some computation done here may be done in computed_values
# instead. We currently miss at least style_for, counters and quotes
# context in computer. Some work will still need to be done here though,
# like box creation for URIs.
content_boxes = []
has_text = set() # Use a set because variable is modified in add_text
def add_text(text):
has_text.add(True)
if text:
if content_boxes and isinstance(content_boxes[-1], boxes.TextBox):
content_boxes[-1].text += text
else:
content_boxes.append(
boxes.TextBox.anonymous_from(parent_box, text))
missing_counters = []
missing_target_counters = {}
in_page_context = context is not None and page is not None
# Collect missing counters during build_formatting_structure.
# Pointless to collect missing target counters in MarginBoxes.
need_collect_missing = target_collector.collecting and not in_page_context
if parent_box.cached_counter_values is None:
# Store the counter_values in the parent_box to make them accessible
# in @page context.
parent_box.cached_counter_values = {
key: value.copy() for key, value in counter_values.items()}
for type_, value in content_list:
if type_ == 'string':
add_text(value)
elif type_ == 'url' and get_image_from_uri is not None:
origin, uri = value
if origin != 'external':
# Embedding internal references is impossible
continue
image = get_image_from_uri(
url=uri, orientation=parent_box.style['image_orientation'])
if image is not None:
content_boxes.append(
boxes.InlineReplacedBox.anonymous_from(parent_box, image))
elif type_ == 'content()':
added_text = extract_text(value, parent_box)
# Simulate the step of white space processing
# (normally done during the layout)
add_text(added_text.strip())
elif type_ == 'string()':
if not in_page_context:
# string() is currently only valid in @page context
# See /~https://github.com/Kozea/WeasyPrint/issues/723
LOGGER.warning(
'"string(%s)" is only allowed in page margins',
' '.join(value))
continue
add_text(context.get_string_set_for(page, *value))
elif type_ in ('counter()', 'counters()'):
counter_name, counter_type = value[0], value[-1]
if counter_type == 'none':
continue
if need_collect_missing:
if counter_name not in list(counter_values) + missing_counters:
missing_counters.append(counter_name)
if type_ == 'counter()':
counter_value = counter_values.get(counter_name, [0])[-1]
text = counter_style.render_value(counter_value, counter_type)
else:
separator = value[1]
text = separator.join(
counter_style.render_value(counter_value, counter_type)
for counter_value in counter_values.get(counter_name, [0]))
add_text(text)
elif type_ in ('target-counter()', 'target-counters()'):
(anchor_token, counter_name), counter_type = value[:2], value[-1]
if counter_type == 'none':
continue
lookup_target = target_collector.lookup_target(
anchor_token, parent_box, css_token, parse_again)
if lookup_target.state != 'up-to-date':
break
target_values = lookup_target.target_box.cached_counter_values
if need_collect_missing and counter_name not in target_values:
anchor_name = targets.anchor_name_from_token(anchor_token)
missing_counters = missing_target_counters.setdefault(
anchor_name, [])
if counter_name not in missing_counters:
missing_counters.append(counter_name)
# Mixin target's cached page counters.
# cached_page_counter_values are empty during layout.
local_counters = lookup_target.cached_page_counter_values.copy()
local_counters.update(target_values)
if type_ == 'target-counter()':
counter_value = local_counters.get(counter_name, [0])[-1]
text = counter_style.render_value(counter_value, counter_type)
else:
separator = value[2]
if separator[0] != 'string':
break
separator_string = separator[1]
text = separator_string.join(
counter_style.render_value(counter_value, counter_type)
for counter_value in local_counters.get(counter_name, [0]))
add_text(text)
elif type_ == 'target-text()':
anchor_token, text_style = value
lookup_target = target_collector.lookup_target(
anchor_token, parent_box, css_token, parse_again)
if lookup_target.state == 'up-to-date':
target_box = lookup_target.target_box
# TODO: 'before'- and 'after'- content referring missing
# counters are not properly set.
text = extract_text(text_style, target_box)
# Simulate the step of white space processing
# (normally done during the layout)
add_text(text.strip())
else:
break
elif type_ == 'quote' and None not in (quote_depth, quote_style):
is_open = 'open' in value
insert = not value.startswith('no-') and quote_style != 'none'
if not is_open:
quote_depth[0] = max(0, quote_depth[0] - 1)
if insert:
if quote_style == 'auto':
open_quotes, close_quotes = get_lang_quotes(lang)
else:
open_quotes, close_quotes = quote_style
quotes = open_quotes if is_open else close_quotes
add_text(quotes[min(quote_depth[0], len(quotes) - 1)])
if is_open:
quote_depth[0] += 1
elif type_ == 'element()':
if not in_page_context:
LOGGER.warning(
'"element(%s)" is only allowed in page margins',
' '.join(value))
continue
new_box = context.get_running_element_for(page, *value)
if new_box is None:
continue
new_box = new_box.deepcopy()
new_box.style['position'] = 'static'
if isinstance(new_box, boxes.ParentBox):
for child in new_box.descendants():
if child.style['content'] in ('normal', 'none'):
continue
child.children = content_to_boxes(
child.style, child, quote_depth, counter_values,
get_image_from_uri, target_collector, counter_style,
context=context, page=page)
content_boxes.append(new_box)
elif type_ == 'leader()':
if not value[1]:
continue
text_box = boxes.TextBox.anonymous_from(parent_box, value[1])
leader_box = boxes.InlineBox.anonymous_from(
parent_box, (text_box,))
# Avoid breaks inside the leader box
leader_box.style['white_space'] = 'pre'
# Prevent whitespaces from being removed from the text box
text_box.style['white_space'] = 'pre'
leader_box.is_leader = True
content_boxes.append(leader_box)
if has_text or content_boxes:
# Only add CounterLookupItem if the content_list actually produced text
target_collector.collect_missing_counters(
parent_box, css_token, parse_again, missing_counters,
missing_target_counters)
return content_boxes
def content_to_boxes(style, parent_box, quote_depth, counter_values,
get_image_from_uri, target_collector, counter_style,
context=None, page=None):
"""Take the value of a ``content`` property and return boxes."""
def parse_again(mixin_pagebased_counters=None):
"""Closure to parse the ``parent_boxes`` children all again."""
# Neither alters the mixed-in nor the cached counter values, no
# need to deepcopy here
if mixin_pagebased_counters is None:
local_counters = {}
else:
local_counters = mixin_pagebased_counters.copy()
local_counters.update(parent_box.cached_counter_values)
local_children = []
local_children.extend(content_to_boxes(
style, parent_box, orig_quote_depth, local_counters,
get_image_from_uri, target_collector, counter_style))
# TODO: do we need to add markers here?
# TODO: redo the formatting structure of the parent instead of hacking
# the already formatted structure. Find why inline_in_blocks has
# sometimes already been called, and sometimes not.
if (len(parent_box.children) == 1 and
isinstance(parent_box.children[0], boxes.LineBox)):
parent_box.children[0].children = local_children
else:
parent_box.children = local_children
if style['content'] == 'inhibit':
return []
orig_quote_depth = quote_depth[:]
css_token = 'content'
box_list = compute_content_list(
style['content'], parent_box, counter_values, css_token, parse_again,
target_collector, counter_style, get_image_from_uri, quote_depth,
style['quotes'], style['lang'], context, page)
return box_list or []
def compute_string_set(element, box, string_name, content_list,
counter_values, target_collector, counter_style):
"""Parse the content-list value of ``string_name`` for ``string-set``."""
def parse_again(mixin_pagebased_counters=None):
"""Closure to parse the string-set string value all again."""
# Neither alters the mixed-in nor the cached counter values, no
# need to deepcopy here
if mixin_pagebased_counters is None:
local_counters = {}
else:
local_counters = mixin_pagebased_counters.copy()
local_counters.update(box.cached_counter_values)
compute_string_set(
element, box, string_name, content_list, local_counters,
target_collector, counter_style)
css_token = f'string-set::{string_name}'
box_list = compute_content_list(
content_list, box, counter_values, css_token, parse_again,
target_collector, counter_style, element=element)
if box_list is not None:
string = ''.join(
box.text for box in box_list if isinstance(box, boxes.TextBox))
# Avoid duplicates, care for parse_again and missing counters, don't
# change the pointer
for string_set_tuple in box.string_set:
if string_set_tuple[0] == string_name:
box.string_set.remove(string_set_tuple)
break
box.string_set.append((string_name, string))
def compute_bookmark_label(element, box, content_list, counter_values,
target_collector, counter_style):
"""Parses the content-list value for ``bookmark-label``."""
def parse_again(mixin_pagebased_counters={}):
"""Closure to parse the bookmark-label all again."""
# Neither alters the mixed-in nor the cached counter values, no
# need to deepcopy here
if mixin_pagebased_counters is None:
local_counters = {}
else:
local_counters = mixin_pagebased_counters.copy()
local_counters = mixin_pagebased_counters.copy()
local_counters.update(box.cached_counter_values)
compute_bookmark_label(
element, box, content_list, local_counters, target_collector,
counter_style)
css_token = 'bookmark-label'
box_list = compute_content_list(
content_list, box, counter_values, css_token, parse_again,
target_collector, counter_style, element=element)
if box_list:
box.bookmark_label = ''.join(box_text(box) for box in box_list)
def set_content_lists(element, box, style, counter_values, target_collector,
counter_style):
"""Set the content-lists values.
These content-lists are used in GCPM properties like ``string-set`` and
``bookmark-label``.
"""
box.string_set = []
if style['string_set'] != 'none':
for i, (string_name, string_values) in enumerate(style['string_set']):
compute_string_set(
element, box, string_name, string_values, counter_values,
target_collector, counter_style)
if style['bookmark_level'] != 'none':
compute_bookmark_label(
element, box, style['bookmark_label'], counter_values,
target_collector, counter_style)
def update_counters(state, style):
"""Handle the ``counter-*`` properties."""
_quote_depth, counter_values, counter_scopes = state
sibling_scopes = counter_scopes[-1]
for name, value in style['counter_reset']:
if name in sibling_scopes:
counter_values[name].pop()
else:
sibling_scopes.add(name)
counter_values.setdefault(name, []).append(value)
for name, value in style['counter_set']:
values = counter_values.setdefault(name, [])
if not values:
assert name not in sibling_scopes
sibling_scopes.add(name)
values.append(0)
values[-1] = value
counter_increment = style['counter_increment']
if counter_increment == 'auto':
# 'auto' is the initial value but is not valid in stylesheet:
# there was no counter-increment declaration for this element.
# (Or the winning value was 'initial'.)
# https://drafts.csswg.org/css-lists-3/#declaring-a-list-item
if 'list-item' in style['display']:
counter_increment = [('list-item', 1)]
else:
counter_increment = []
for name, value in counter_increment:
values = counter_values.setdefault(name, [])
if not values:
assert name not in sibling_scopes
sibling_scopes.add(name)
values.append(0)
values[-1] += value
def is_whitespace(box, _has_non_whitespace=re.compile('\\S').search):
"""Return True if ``box`` is a TextBox with only whitespace."""
return isinstance(box, boxes.TextBox) and not _has_non_whitespace(box.text)
def wrap_improper(box, children, wrapper_type, test=None):
"""
Wrap consecutive children that do not pass ``test`` in a box of type
``wrapper_type``.
``test`` defaults to children being of the same type as ``wrapper_type``.
"""
if test is None:
def test(child):
return isinstance(child, wrapper_type)
improper = []
for child in children:
if test(child):
if improper:
wrapper = wrapper_type.anonymous_from(box, children=[])
# Apply the rules again on the new wrapper
yield table_boxes_children(wrapper, improper)
improper = []
yield child
else:
# Whitespace either fail the test or were removed earlier,
# so there is no need to take special care with the definition
# of "consecutive".
if isinstance(box, boxes.FlexContainerBox):
# The display value of a flex item must be "blockified", see
# https://www.w3.org/TR/css-flexbox-1/#flex-items
# TODO: These blocks are currently ignored, we should
# "blockify" them and their children.
pass
else:
improper.append(child)
if improper:
wrapper = wrapper_type.anonymous_from(box, children=[])
# Apply the rules again on the new wrapper
yield table_boxes_children(wrapper, improper)
def anonymous_table_boxes(box):
"""Remove and add boxes according to the table model.
Take and return a ``Box`` object.
See https://www.w3.org/TR/CSS21/tables.html#anonymous-boxes
"""
if not isinstance(box, boxes.ParentBox) or box.is_running():
return box
# Do recursion.
children = [anonymous_table_boxes(child) for child in box.children]
return table_boxes_children(box, children)
def table_boxes_children(box, children):
"""Internal implementation of anonymous_table_boxes()."""
if isinstance(box, boxes.TableColumnBox): # rule 1.1
# Remove all children.
children = []
elif isinstance(box, boxes.TableColumnGroupBox): # rule 1.2
# Remove children other than table-column.
children = [
child for child in children
if isinstance(child, boxes.TableColumnBox)
]
# Rule XXX (not in the spec): column groups have at least
# one column child.
if not children:
if box.span is None or box.span < 1:
span = 1
else:
span = box.span
children = [boxes.TableColumnBox.anonymous_from(box, [])
for _ in range(span)]
# rule 1.3
if box.tabular_container and len(children) >= 2:
# TODO: Maybe only remove text if internal is also
# a proper table descendant of box.
# This is what the spec says, but maybe not what browsers do:
# https://lists.w3.org/Archives/Public/www-style/2011Oct/0567
# Last child
internal, text = children[-2:]
if (internal.internal_table_or_caption and is_whitespace(text)):
children.pop()
# First child
if len(children) >= 2:
text, internal = children[:2]
if (internal.internal_table_or_caption and is_whitespace(text)):
children.pop(0)
# Children other than first and last that would be removed by
# rule 1.3 are also removed by rule 1.4 below.
children = [
child
for prev_child, child, next_child in zip(
[None] + children[:-1],
children,
children[1:] + [None]
)
if not (
# Ignore some whitespace: rule 1.4
prev_child and prev_child.internal_table_or_caption and
next_child and next_child.internal_table_or_caption and
is_whitespace(child)
)
]
if isinstance(box, boxes.TableBox):
# Rule 2.1
children = wrap_improper(
box, children, boxes.TableRowBox,
lambda child: child.proper_table_child)
elif isinstance(box, boxes.TableRowGroupBox):
# Rule 2.2
children = wrap_improper(box, children, boxes.TableRowBox)
if isinstance(box, boxes.TableRowBox):
# Rule 2.3
children = wrap_improper(box, children, boxes.TableCellBox)
else:
# Rule 3.1
children = wrap_improper(
box, children, boxes.TableRowBox,
lambda child: not isinstance(child, boxes.TableCellBox))
# Rule 3.2
if isinstance(box, boxes.InlineBox):
children = wrap_improper(
box, children, boxes.InlineTableBox,
lambda child: not child.proper_table_child)
else:
parent_type = type(box)
children = wrap_improper(
box, children, boxes.TableBox,
lambda child: (not child.proper_table_child or
parent_type in child.proper_parents))
if isinstance(box, boxes.TableBox):
return wrap_table(box, children)
else:
box.children = list(children)
return box
def wrap_table(box, children):
"""Take a table box and return it in its table wrapper box.
Also re-order children and assign grid positions to each column and cell.
Because of colspan/rowspan works, grid_y is implicitly the index of a row,
but grid_x is an explicit attribute on cells, columns and column group.
https://www.w3.org/TR/CSS21/tables.html#model
https://www.w3.org/TR/CSS21/tables.html#table-layout
"""
# Group table children by type
columns = []
rows = []
all_captions = []
by_type = {
boxes.TableColumnBox: columns,
boxes.TableColumnGroupBox: columns,
boxes.TableRowBox: rows,
boxes.TableRowGroupBox: rows,
boxes.TableCaptionBox: all_captions,
}
for child in children:
by_type[type(child)].append(child)
# Split top and bottom captions
captions = {'top': [], 'bottom': []}
for caption in all_captions:
captions[caption.style['caption_side']].append(caption)
# Assign X positions on the grid to column boxes
column_groups = list(wrap_improper(
box, columns, boxes.TableColumnGroupBox))
grid_x = 0
for group in column_groups:
group.grid_x = grid_x
if group.children:
for column in group.children:
# There's no need to take care of group's span, as "span=x"
# already generates x TableColumnBox children
column.grid_x = grid_x
grid_x += 1
else:
grid_x += group.span
grid_width = grid_x
row_groups = wrap_improper(box, rows, boxes.TableRowGroupBox)
# Extract the optional header and footer groups.
body_row_groups = []
header = None
footer = None
for group in row_groups:
display = group.style['display']
if display == ('table-header-group',) and header is None:
group.is_header = True
header = group
elif display == ('table-footer-group',) and footer is None:
group.is_footer = True
footer = group
else:
body_row_groups.append(group)
row_groups = (
([header] if header is not None else []) +
body_row_groups +
([footer] if footer is not None else []))
# Assign a (x,y) position in the grid to each cell.
# rowspan can not extend beyond a row group, so each row group
# is independent.
# https://www.w3.org/TR/CSS21/tables.html#table-layout
# Column 0 is on the left if direction is ltr, right if rtl.
# This algorithm does not change.
grid_height = 0
for group in row_groups:
# Indexes: row number in the group.
# Values: set of cells already occupied by row-spanning cells.
occupied_cells_by_row = [set() for row in group.children]
for row in group.children:
occupied_cells_in_this_row = occupied_cells_by_row.pop(0)
# The list is now about rows after this one.
grid_x = 0
for cell in row.children:
# Make sure that the first grid cell is free.
while grid_x in occupied_cells_in_this_row:
grid_x += 1
cell.grid_x = grid_x
new_grid_x = grid_x + cell.colspan
# https://www.w3.org/TR/html401/struct/tables.html#adef-rowspan
if cell.rowspan != 1:
max_rowspan = len(occupied_cells_by_row) + 1
if cell.rowspan == 0:
# All rows until the end of the group
spanned_rows = occupied_cells_by_row
cell.rowspan = max_rowspan
else:
cell.rowspan = min(cell.rowspan, max_rowspan)
spanned_rows = occupied_cells_by_row[:cell.rowspan - 1]
spanned_columns = range(grid_x, new_grid_x)
for occupied_cells in spanned_rows:
occupied_cells.update(spanned_columns)
grid_x = new_grid_x
grid_width = max(grid_width, grid_x)
grid_height += len(group.children)
table = box.copy_with_children(row_groups)
table.style = table.style.copy()
table.column_groups = tuple(column_groups)
if table.style['border_collapse'] == 'collapse':
table.collapsed_border_grid = collapse_table_borders(
table, grid_width, grid_height)
if isinstance(box, boxes.InlineTableBox):
wrapper_type = boxes.InlineBlockBox
else:
wrapper_type = boxes.BlockBox
wrapper = wrapper_type.anonymous_from(
box, captions['top'] + [table] + captions['bottom'])
wrapper.style = wrapper.style.copy()
wrapper.is_table_wrapper = True
# Non-inherited properties of the table element apply to one
# of the wrapper and the table. The other get the initial value.
# TODO: put this in a method of the table object
for name in properties.TABLE_WRAPPER_BOX_PROPERTIES:
wrapper.style[name] = table.style[name]
table.style[name] = properties.INITIAL_VALUES[name]