-
Notifications
You must be signed in to change notification settings - Fork 9
/
tagpdf-struct.dtx
2297 lines (2266 loc) · 77.6 KB
/
tagpdf-struct.dtx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
% \iffalse meta-comment
%
%% File: tagpdf-struct.dtx
%
% Copyright (C) 2019-2024 Ulrike Fischer
%
% It may be distributed and/or modified under the conditions of the
% LaTeX Project Public License (LPPL), either version 1.3c of this
% license or (at your option) any later version. The latest version
% of this license is in the file
%
% https://www.latex-project.org/lppl.txt
%
% This file is part of the "tagpdf bundle" (The Work in LPPL)
% and all files in that bundle must be distributed together.
%
% -----------------------------------------------------------------------
%
% The development version of the bundle can be found at
%
% https://github.com/latex3/tagpdf
%
% for those people who are interested.
%<*driver>
\DocumentMetadata{}
\documentclass{l3doc}
\usepackage{array,booktabs,caption}
\hypersetup{pdfauthor=Ulrike Fischer,
pdftitle=tagpdf-tree module (tagpdf)}
\input{tagpdf-docelements}
\begin{document}
\DocInput{\jobname.dtx}
\end{document}
%</driver>
% \fi
% \title{^^A
% The \pkg{tagpdf-struct} module\\ Commands to create the structure ^^A
% \\ Part of the tagpdf package
% }
%
% \author{^^A
% Ulrike Fischer\thanks
% {^^A
% E-mail:
% \href{mailto:fischer@troubleshooting-tex.de}
% {fischer@troubleshooting-tex.de}^^A
% }^^A
% }
%
% \date{Version 0.99k, released 2024-12-04}
% \maketitle
% \begin{documentation}
% \section{Public Commands}
% \begin{function}{\tag_struct_begin:n,\tag_struct_end:,\tag_struct_end:n}
% \begin{syntax}
% \cs{tag_struct_begin:n}\Arg{key-values}\\
% \cs{tag_struct_end:}\\
% \cs{tag_struct_end:n}\Arg{tag}
% \end{syntax}
% These commands start and end a new structure.
% They don't start a group. They set all their values globally.
% \cs{tag_struct_end:n} does nothing special normally (apart from
% swallowing its argument, but if \texttt{tagpdf-debug} is loaded,
% it will check if the \Arg{tag} (after expansion)
% is identical to the current structure on the stack. The tag is not role mapped!
% \end{function}
% \begin{function}{\tag_struct_use:n,\tag_struct_use_num:n}
% \begin{syntax}
% \cs{tag_struct_use:n}\Arg{label}\\
% \cs{tag_struct_use_num:n}\Arg{structure number}
% \end{syntax}
% These commands insert a structure previously stashed away as kid
% into the currently active structure.
% A structure should be used only once,
% if the structure already has a parent a warning is issued.
% \end{function}
% \begin{function}{\tag_struct_object_ref:n,\tag_struct_object_ref:e}
% \begin{syntax}
% \cs{tag_struct_object_ref:n}\Arg{struct number}
% \end{syntax}
% This is a small wrapper around |\pdf_object_ref:n| to retrieve the
% object reference of the structure with the number \meta{struct number}.
% This number can be retrieved and stored for the current structure
% for example with \cs{tag_get:n}\Arg{struct_num}. Be aware that it can only
% be used if the structure has already been created and that it doesn't check
% if the object actually exists!
% \end{function}
%
% The following two functions are used to add annotations. They must be used
% together and with care to get the same numbers. Perhaps some improvements are needed
% here.
% \begin{function}{\tag_struct_insert_annot:nn}
% \begin{syntax}
% \cs{tag_struct_insert_annot:nn}\Arg{object reference}\Arg{struct parent number}
% \end{syntax}
% This inserts an annotation in the structure. \meta{object reference}
% is there reference to the annotation. \meta{struct parent number}
% should be the same number as had been inserted with \cs{tag_struct_parent_int:}
% as |StructParent| value to the dictionary of the annotation.
% The command will increase the value of the counter
% used by \cs{tag_struct_parent_int:}.
% \end{function}
% \begin{function}{\tag_struct_parent_int:}
% \begin{syntax}
% \cs{tag_struct_parent_int:}
% \end{syntax}
% This gives back the next free /StructParent number (assuming that it is
% together with \cs{tag_struct_insert_annot:nn} which will increase the number.
% \end{function}
%
% \begin{function}{\tag_struct_gput:nnn}
% \begin{syntax}
% \cs{tag_struct_gput:nnn}\Arg{structure number}\Arg{keyword}\Arg{value}
% \end{syntax}
% This is a command that allows to update the data of a structure.
% This often can't done simply by replacing the value, as we have to
% preserve and extend existing content. We use therefore dedicated functions
% adjusted to the key in question.
% The first argument is the number of the structure,
% the second a keyword referring to a function,
% the third the value. Currently the only keyword is \texttt{ref} which updates
% the Ref key (an array)
% \end{function}
%
% \begin{function}{\tag_struct_gput_ref:nnn}
% \begin{syntax}
% \cs{tag_struct_gput_ref:nnn}\Arg{structure number}\Arg{keyword}\Arg{value}
% \end{syntax}
% This is an user interface to add a Ref key to an
% existing structure. The target structure doesn't have to exist yet
% but can be addressed by label, destname or even num.
% \meta{keyword} is currently either \texttt{label}, \texttt{dest}
% or \texttt{num}. The value is then either a label name, the name of a destination
% or a structure number.
% \end{function}
%
% \section{Public keys}
% \subsection{Keys for the structure commands}
% \begin{structkeydecl}{tag}
% This is required. The value of the key is normally one of the
% standard types listed in the main tagpdf documentation.
% It is possible to setup new tags/types.
% The value can also be of the form |type/NS|, where |NS| is the
% shorthand of a declared name space.
% Currently the names spaces |pdf|, |pdf2|, |mathml| and |user| are defined.
% This allows to use a different name space than
% the one connected by default to the tag. But normally this should not be needed.
% \end{structkeydecl}
% \begin{structkeydecl}{stash}
% Normally a new structure inserts itself as a kid
% into the currently active structure. This key prohibits this.
% The structure is nevertheless from now on
% \enquote{the current active structure}
% and parent for following marked content and structures.
% \end{structkeydecl}
% \begin{structkeydecl}{label}
% This key sets a label by which
% one can refer to the structure. It is e.g.
% used by \cs{tag_struct_use:n} (where a real label is actually not
% needed as you can only use structures already defined), and by the
% |ref| key (which can refer to future structures).
% Internally the label name will start with \texttt{tagpdfstruct-} and it stores
% the two attributes |tagstruct| (the structure number) and |tagstructobj| (the
% object reference).
% \end{structkeydecl}
% \begin{structkeydecl}{parent}
% By default a structure is added as kid to the currently active structure.
% With the parent key one can choose another parent. The value is a structure number which
% must refer to an already existing, previously created structure. Such a structure
% number can for example be have been stored with \cs{tag_get:n}, but one can also use
% a label on the parent structure and then use
% \cs{property_ref:nn}|{tagpdfstruct-label}{tagstruct}| to retrieve it.
% \end{structkeydecl}
% \begin{structkeydecl}{firstkey}
% If this key is used the structure is added at the left of the kids of
% the parent structure (if the structure is not stashed).
% This means that it will be the first kid of the structure
% (unless some later structure uses the key too).
% \end{structkeydecl}
% \begin{structkeydecl}{title,title-o}
% This keys allows to set the dictionary entry
% \texttt{/Title} in the structure object.
% The value is handled as verbatim string and hex encoded.
% Commands are not expanded. |title-o| will expand the value once.
% \end{structkeydecl}
%
% \begin{structkeydecl}{alt}
% This key inserts an \texttt{/Alt} value in the dictionary of structure object.
% The value is handled as verbatim string and hex encoded.
% The value will be expanded first once. If it is empty, nothing will happen.
% \end{structkeydecl}
% \begin{structkeydecl}{actualtext}
% This key inserts an \texttt{/ActualText} value in the dictionary of structure object.
% The value is handled as verbatim string and hex encoded.
% The value will be expanded first once. If it is empty, nothing will happen.
% \end{structkeydecl}
% \begin{structkeydecl}{lang}
% This key allows to set the language for a structure element. The value should be a bcp-identifier,
% e.g. |de-De|.
% \end{structkeydecl}
% \begin{structkeydecl}{ref}
% This key allows to add references to other structure elements,
% it adds the |/Ref| array to the structure.
% The value should be a comma separated list of structure labels
% set with the |label| key. e.g. |ref={label1,label2}|.
% \end{structkeydecl}
% \begin{structkeydecl}{E}
% This key sets the |/E| key, the expanded form of an
% abbreviation or an acronym
% (I couldn't think of a better name, so I sticked to E).
% \end{structkeydecl}
% \begin{structkeydecl}{AF,AFref,
% AFinline,AFinline-o,texsource,mathml}
% These keys handle associated files in the structure element.
%
% \begin{syntax}
% AF = \meta{object name}\\
% AFref = \meta{object reference}\\
% AF-inline = \meta{text content}\\
% \end{syntax}
%
% The value \meta{object name} should be the name of an object pointing
% to the \texttt{/Filespec} dictionary as expected by
% |\pdf_object_ref:n| from a current \texttt{l3kernel}.
%
% The value |AF-inline| is some text,
% which is embedded in the PDF as a text file with mime type text/plain.
% |AF-inline-o| is like |AF-inline| but expands the value once.
%
% Future versions will perhaps extend this to more mime types, but it is
% still a research task to find out what is really needed.
%
% |texsource| is a special variant of |AF-inline-o| which embeds the content
% as |.tex| source with the |/AFrelationship| key set to |/Source|.
% It also sets the |/Desc| key to a (currently) fix text.
%
% |mathml| is a special variant of |AF-inline-o| which embeds the content
% as |.xml| file with the |/AFrelationship| key set to |/Supplement|.
% It also sets the |/Desc| key to a (currently) fix text.
%
% The argument of |AF| is an object name referring an embedded file as declared
% for example with
% \cs{pdf_object_new:n} or with the l3pdffile module. |AF| expands its argument
% (this allows e.g. to use some variable for automatic numbering)
% and can be used more than once, to associate more than one file.
%
% The argument of |AFref| is an object reference to an embedded file
% or a variable expanding to such a object reference in the format
% as you would get e.g. from \cs{pdf_object_ref_last:} or \cs{pdf_object_ref:n}
% (and which is different for the various engines!). The key allows to make
% use of anonymous objects. Like |AF| the |AFref| key expands its argument
% and can be used more than once, to associate more than one file. \emph{It
% does not check if the reference is valid!}
%
% The inline keys can be used only once per structure. Additional calls are ignored.
% \end{structkeydecl}
%
% \begin{structkeydecl}{attribute}
% This key takes as argument a comma list of attribute names
% (use braces to protect the commas from the external key-val parser)
% and allows to add one or more attribute dictionary entries in
% the structure object. As an example
% \begin{verbatim}
% \tagstructbegin{tag=TH,attribute= TH-row}
% \end{verbatim}
% Attribute names and their content must be declared first in \cs{tagpdfsetup}.
%
% \end{structkeydecl}
%
% \begin{structkeydecl}{attribute-class}
% This key takes as argument a comma list of attribute class names
% (use braces to protect the commas from the external key-val parser)
% and allows to add one or more attribute classes to the structure object.
%
% Attribute class names and their content
% must be declared first in \cs{tagpdfsetup}.
% \end{structkeydecl}
%
% \subsection{Setup keys}
% \begin{function}{role/new-attribute (setup-key), newattribute (deprecated)}
% \begin{syntax}
% role/new-attribute = \Arg{name}\Arg{Content}
% \end{syntax}
% This key can be used in the setup command \cs{tagpdfsetup} and allow to declare a
% new attribute, which can be used as attribute or attribute class.
% The value are two brace groups, the first contains the name, the second the content.
% \begin{verbatim}
% \tagpdfsetup
% {
% role/new-attribute =
% {TH-col}{/O /Table /Scope /Column},
% role/new-attribute =
% {TH-row}{/O /Table /Scope /Row},
% }
% \end{verbatim}
%
% \end{function}
% \begin{setupkeydecl}{root-AF}
% \begin{syntax}
% root-AF = \meta{object name}
% \end{syntax}
% This key can be used in the setup command \cs{tagpdfsetup} and allows
% to add associated files to the root structure. Like |AF| it can be used more than
% once to add more than one file.
% \end{setupkeydecl}
% \end{documentation}
% \begin{implementation}
% \begin{macrocode}
%<@@=tag>
%<*header>
\ProvidesExplPackage {tagpdf-struct-code} {2024-12-04} {0.99k}
{part of tagpdf - code related to storing structure}
%</header>
% \end{macrocode}
% \section{Variables}
% \begin{variable}{\c@g_@@_struct_abs_int}
% Every structure will have a unique, absolute number.
% I will use a latex counter for the structure count
% to have a chance to avoid double structures in align etc.
%
% \begin{macrocode}
%<base>\newcounter { g_@@_struct_abs_int }
%<base>\int_gset:Nn \c@g_@@_struct_abs_int { 1 }
% \end{macrocode}
% \end{variable}
%
% \begin{variable}{\g_@@_struct_objR_seq}
% a sequence to store mapping between the
% structure number and the object number.
% We assume that structure numbers are assign
% consecutively and so the index of the seq can be used.
% A seq allows easy mapping over the structures.
% \begin{macrocode}
%<*package>
\@@_seq_new:N \g_@@_struct_objR_seq
% \end{macrocode}
% \end{variable}
% \begin{variable}{\c_@@_struct_null_tl}
% In lua mode we have to test if the kids a null
% \begin{macrocode}
\tl_const:Nn\c_@@_struct_null_tl {null}
% \end{macrocode}
% \end{variable}
% \begin{variable}{\g_@@_struct_cont_mc_prop}
% in generic mode it can happen after
% a page break that we have to inject into a structure
% sequence an additional mc after. We will store this additional
% info in a property. The key is the absolute mc num, the value the pdf directory.
% \begin{macrocode}
\@@_prop_new:N \g_@@_struct_cont_mc_prop
% \end{macrocode}
% \end{variable}
%
% \begin{variable}{\g_@@_struct_stack_seq}
% A stack sequence for the structure stack.
% When a sequence is opened it's number is put on the stack.
% \begin{macrocode}
\seq_new:N \g_@@_struct_stack_seq
\seq_gpush:Nn \g_@@_struct_stack_seq {1}
% \end{macrocode}
% \end{variable}
%
% \begin{variable}{\g_@@_struct_tag_stack_seq}
% We will perhaps also need the tags. While it is possible to get them from the
% numbered stack, lets build a tag stack too.
% \begin{macrocode}
\seq_new:N \g_@@_struct_tag_stack_seq
\seq_gpush:Nn \g_@@_struct_tag_stack_seq {{Root}{StructTreeRoot}}
% \end{macrocode}
% \end{variable}
%
%
% \begin{variable}{\g_@@_struct_stack_current_tl,\l_@@_struct_stack_parent_tmpa_tl}
% The global variable will hold the current structure number. It is already
% defined in \texttt{tagpdf-base}.
% The local temporary variable will hold the parent when we fetch it from the stack.
% \begin{macrocode}
%</package>
%<base>\tl_new:N \g_@@_struct_stack_current_tl
%<base>\tl_gset:Nn \g_@@_struct_stack_current_tl {\int_use:N\c@g_@@_struct_abs_int}
%<*package>
\tl_new:N \l_@@_struct_stack_parent_tmpa_tl
% \end{macrocode}
% \end{variable}
%
% I will need at least one structure: the StructTreeRoot
% normally it should have only one kid, e.g. the document element.
% The data of the StructTreeRoot and the StructElem are in properties:
% |\g_@@_struct_1_prop| for the root and
% |\g_@@_struct_N_prop|, $N \geq =2$ for the other.
%
% This creates quite a number of properties, so perhaps we will have to
% do this more efficiently in the future.
%
% All properties have at least the keys
% \begin{description}
% \item[Type] StructTreeRoot or StructElem
% \end{description}
% and the keys from the two following lists
% (the root has a special set of properties).
% the values of the prop should be already escaped properly
% when the entries are created (title,lange,alt,E,actualtext)
% \begin{variable}
% {
% \c_@@_struct_StructTreeRoot_entries_seq,
% \c_@@_struct_StructElem_entries_seq
% }
% These seq contain the keys we support in the two object types.
% They are currently no longer used, but are provided as documentation and
% for potential future checks.
% They should be adapted if there are changes in the PDF format.
% \begin{macrocode}
\seq_const_from_clist:Nn \c_@@_struct_StructTreeRoot_entries_seq
{%p. 857/858
Type, % always /StructTreeRoot
K, % kid, dictionary or array of dictionaries
IDTree, % currently unused
ParentTree, % required,obj ref to the parent tree
ParentTreeNextKey, % optional
RoleMap,
ClassMap,
Namespaces,
AF %pdf 2.0
}
\seq_const_from_clist:Nn \c_@@_struct_StructElem_entries_seq
{%p 858 f
Type, %always /StructElem
S, %tag/type
P, %parent
ID, %optional
Ref, %optional, pdf 2.0 Use?
Pg, %obj num of starting page, optional
K, %kids
A, %attributes, probably unused
C, %class ""
%R, %attribute revision number, irrelevant for us as we
% don't update/change existing PDF and (probably)
% deprecated in PDF 2.0
T, %title, value in () or <>
Lang, %language
Alt, % value in () or <>
E, % abbreviation
ActualText,
AF, %pdf 2.0, array of dict, associated files
NS, %pdf 2.0, dict, namespace
PhoneticAlphabet, %pdf 2.0
Phoneme %pdf 2.0
}
% \end{macrocode}
% \end{variable}
%
% \subsection{Variables used by the keys}
% \begin{variable}{\g_@@_struct_tag_tl,\g_@@_struct_tag_NS_tl,
% \l_@@_struct_roletag_tl,\g_@@_struct_roletag_NS_tl}
% Use by the tag key to store the tag and the namespace.
% The role tag variables will hold locally rolemapping info needed
% for the parent-child checks
% \begin{macrocode}
\tl_new:N \g_@@_struct_tag_tl
\tl_new:N \g_@@_struct_tag_NS_tl
\tl_new:N \l_@@_struct_roletag_tl
\tl_new:N \l_@@_struct_roletag_NS_tl
% \end{macrocode}
% \end{variable}
% \begin{variable}{\g_@@_struct_label_num_prop}
% This will hold for every structure label the associated
% structure number. The prop will allow to
% fill the /Ref key directly at the first compilation if the ref
% key is used.
% \begin{macrocode}
\prop_new_linked:N \g_@@_struct_label_num_prop
% \end{macrocode}
% \end{variable}
% \begin{variable}{\l_@@_struct_elem_stash_bool}
% This will keep track of the stash status
% \begin{macrocode}
\bool_new:N \l_@@_struct_elem_stash_bool
% \end{macrocode}
% \end{variable}
%
% \begin{variable}{\l_@@_struct_addkid_tl}
% This decides if a structure kid is added at the left or right of the parent.
% The default is \texttt{right}.
% \begin{macrocode}
\tl_new:N \l_@@_struct_addkid_tl
\tl_set:Nn \l_@@_struct_addkid_tl {right}
% \end{macrocode}
% \end{variable}
% \subsection{Variables used by tagging code of basic elements}
%
% \begin{variable}{\g_@@_struct_dest_num_prop}
% This variable records for (some or all, not clear yet)
% destination names the related structure number to allow
% to reference them in a Ref. The key is the destination.
% It is currently used by the toc-tagging and sec-tagging code.
% \begin{macrocode}
%</package>
%<base>\prop_new_linked:N \g_@@_struct_dest_num_prop
%<*package>
% \end{macrocode}
% \end{variable}
%
% \begin{variable}{\g_@@_struct_ref_by_dest_prop}
% This variable contains structures whose Ref key should be updated
% at the end to point to structured related with this destination.
% As this is probably need in other places too, it is not only a toc-variable.
% TODO: remove after 11/2024 release.
% \begin{macrocode}
\prop_new_linked:N \g_@@_struct_ref_by_dest_prop
% \end{macrocode}
% \end{variable}
%
% \section{Commands}
%
% The properties must be in some places handled expandably.
% So I need an output handler for each prop, to get expandable output
% see \url{https://tex.stackexchange.com/questions/424208}.
% There is probably room here for a more efficient implementation.
% TODO check if this can now be implemented with the pdfdict commands.
% The property contains currently non pdf keys, but e.g. object numbers are
% perhaps no longer needed as we have named object anyway.
%
% \begin{macro}{\@@_struct_output_prop_aux:nn,\@@_new_output_prop_handler:n}
% \begin{macrocode}
\cs_new:Npn \@@_struct_output_prop_aux:nn #1 #2 %#1 num, #2 key
{
\prop_if_in:cnT
{ g_@@_struct_#1_prop }
{ #2 }
{
\c_space_tl/#2~ \prop_item:cn{ g_@@_struct_#1_prop } { #2 }
}
}
\cs_new_protected:Npn \@@_new_output_prop_handler:n #1
{
\cs_new:cn { @@_struct_output_prop_#1:n }
{
\@@_struct_output_prop_aux:nn {#1}{##1}
}
}
%</package>
% \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_struct_prop_gput:nnn}
% The structure props must be filled in various places.
% For this we use a common command which also takes care of the debug package:
% \begin{macrocode}
%<*package|debug>
%<package>\cs_new_protected:Npn \@@_struct_prop_gput:nnn #1 #2 #3
%<debug>\cs_set_protected:Npn \@@_struct_prop_gput:nnn #1 #2 #3
{
\@@_prop_gput:cnn
{ g_@@_struct_#1_prop }{#2}{#3}
%<debug>\prop_gput:cnn { g_@@_struct_debug_#1_prop } {#2} {#3}
}
\cs_generate_variant:Nn \@@_struct_prop_gput:nnn {onn,nne,nee,nno}
%</package|debug>
% \end{macrocode}
% \end{macro}
% \subsection{Initialization of the StructTreeRoot}
% The first structure element, the StructTreeRoot is special, so
% created manually. The underlying object is |@@/struct/1| which is currently
% created in the tree code (TODO move it here).
% The |ParentTree| and |RoleMap| entries are added at begin document
% in the tree code as they refer to object which are setup in other parts of the
% code. This avoid timing issues.
%
% \begin{macrocode}
%<*package>
\tl_gset:Nn \g_@@_struct_stack_current_tl {1}
% \end{macrocode}
% \begin{macro}{\@@_pdf_name_e:n}
% \begin{macrocode}
\cs_new:Npn \@@_pdf_name_e:n #1{\pdf_name_from_unicode_e:n{#1}}
%</package>
% \end{macrocode}
% \end{macro}
%
% \begin{variable}{g_@@_struct_1_prop,g_@@_struct_kids_1_seq}
% \begin{macrocode}
%<*package>
\@@_prop_new:c { g_@@_struct_1_prop }
\@@_new_output_prop_handler:n {1}
\@@_seq_new:c { g_@@_struct_kids_1_seq }
\@@_struct_prop_gput:nne
{ 1 }
{ Type }
{ \pdf_name_from_unicode_e:n {StructTreeRoot} }
\@@_struct_prop_gput:nne
{ 1 }
{ S }
{ \pdf_name_from_unicode_e:n {StructTreeRoot} }
\@@_struct_prop_gput:nne
{ 1 }
{ rolemap }
{ {StructTreeRoot}{pdf} }
\@@_struct_prop_gput:nne
{ 1 }
{ parentrole }
{ {StructTreeRoot}{pdf} }
% \end{macrocode}
% Namespaces are pdf 2.0.
% If the code moves into the kernel, the setting must be probably delayed.
% \begin{macrocode}
\pdf_version_compare:NnF < {2.0}
{
\@@_struct_prop_gput:nne
{ 1 }
{ Namespaces }
{ \pdf_object_ref:n { @@/tree/namespaces } }
}
%</package>
% \end{macrocode}
% In debug mode we have to copy the root manually as it is already setup:
% \begin{macrocode}
%<debug>\prop_new:c { g_@@_struct_debug_1_prop }
%<debug>\seq_new:c { g_@@_struct_debug_kids_1_seq }
%<debug>\prop_gset_eq:cc { g_@@_struct_debug_1_prop }{ g_@@_struct_1_prop }
%<debug>\prop_gremove:cn { g_@@_struct_debug_1_prop }{Namespaces}
% \end{macrocode}
% \end{variable}
%
% \subsection{Adding the /ID key}
% Every structure gets automatically an ID which is currently
% simply calculated from the structure number.
% \begin{macro}{\@@_struct_get_id:n}
% \begin{macrocode}
%<*package>
\cs_new:Npn \@@_struct_get_id:n #1 %#1=struct num
{
(
ID.
\prg_replicate:nn
{ \int_abs:n{\g_@@_tree_id_pad_int - \tl_count:e { \int_to_arabic:n { #1 } }} }
{ 0 }
\int_to_arabic:n { #1 }
)
}
% \end{macrocode}
% \end{macro}
%
% \subsection{Filling in the tag info}
% \begin{macro}{\@@_struct_set_tag_info:nnn }
% This adds or updates the tag info to a structure given by a number.
% We need also the original data, so we store both.
% \begin{macrocode}
\pdf_version_compare:NnTF < {2.0}
{
\cs_new_protected:Npn \@@_struct_set_tag_info:nnn #1 #2 #3
%#1 structure number, #2 tag, #3 NS
{
\@@_struct_prop_gput:nne
{ #1 }
{ S }
{ \pdf_name_from_unicode_e:n {#2} } %
}
}
{
\cs_new_protected:Npn \@@_struct_set_tag_info:nnn #1 #2 #3
{
\@@_struct_prop_gput:nne
{ #1 }
{ S }
{ \pdf_name_from_unicode_e:n {#2} } %
\prop_get:NnNT \g_@@_role_NS_prop {#3} \l_@@_get_tmpc_tl
{
\@@_struct_prop_gput:nne
{ #1 }
{ NS }
{ \l_@@_get_tmpc_tl } %
}
}
}
\cs_generate_variant:Nn \@@_struct_set_tag_info:nnn {eVV}
% \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_struct_get_parentrole:nNN}
% We also need a way to get the tag info needed for parent child
% check from parent structures.
% \begin{macrocode}
\cs_new_protected:Npn \@@_struct_get_parentrole:nNN #1 #2 #3
%#1 struct num, #2 tlvar for tag , #3 tlvar for NS
{
\prop_get:cnNTF
{ g_@@_struct_#1_prop }
{ parentrole }
\l_@@_get_tmpc_tl
{
\tl_set:Ne #2{\exp_last_unbraced:NV\use_i:nn \l_@@_get_tmpc_tl}
\tl_set:Ne #3{\exp_last_unbraced:NV\use_ii:nn \l_@@_get_tmpc_tl}
}
{
\tl_clear:N#2
\tl_clear:N#3
}
}
\cs_generate_variant:Nn\@@_struct_get_parentrole:nNN {eNN}
% \end{macrocode}
% \end{macro}
% \subsection{Handlings kids}
% Commands to store the kids. Kids in a structure can be a reference to a mc-chunk,
% an object reference to another structure element, or a object reference to an
% annotation (through an OBJR object).
% \begin{macro}{\@@_struct_kid_mc_gput_right:nn,\@@_struct_kid_mc_gput_right:ne}
% The command to store an mc-chunk, this is a dictionary of type MCR.
% It would be possible to write out the content directly as unnamed object
% and to store only the object reference, but probably this would be slower,
% and the PDF is more readable like this.
% The code doesn't try to avoid the use of the /Pg key by checking page numbers.
% That imho only slows down without much gain.
% In generic mode the page break code will perhaps to have to insert
% an additional mcid after an existing one. For this we use a property list
% At first an auxiliary to write the MCID dict. This should normally be expanded!
% \begin{macrocode}
\cs_new:Npn \@@_struct_mcid_dict:n #1 %#1 MCID absnum
{
<<
/Type \c_space_tl /MCR \c_space_tl
/Pg
\c_space_tl
\pdf_pageobject_ref:n { \property_ref:enn{mcid-#1}{tagabspage}{1} }
/MCID \c_space_tl \property_ref:enn{mcid-#1}{tagmcid}{1}
>>
}
%</package>
% \end{macrocode}
% \begin{macrocode}
%<*package|debug>
%<package>\cs_new_protected:Npn \@@_struct_kid_mc_gput_right:nn #1 #2
%<debug>\cs_set_protected:Npn \@@_struct_kid_mc_gput_right:nn #1 #2
%#1 structure num, #2 MCID absnum%
{
\@@_seq_gput_right:ce
{ g_@@_struct_kids_#1_seq }
{
\@@_struct_mcid_dict:n {#2}
}
%<debug> \seq_gput_right:cn
%<debug> { g_@@_struct_debug_kids_#1_seq }
%<debug> {
%<debug> MC~#2
%<debug> }
\@@_seq_gput_right:cn
{ g_@@_struct_kids_#1_seq }
{
\prop_item:Nn \g_@@_struct_cont_mc_prop {#2}
}
}
%<package>\cs_generate_variant:Nn \@@_struct_kid_mc_gput_right:nn {ne}
% \end{macrocode}
% \end{macro}
% \begin{macro}
% {
% \@@_struct_kid_struct_gput_right:nn,\@@_struct_kid_struct_gput_right:ee
% }
% This commands adds a structure as kid. We only need to record the object
% reference in the sequence.
% \begin{macrocode}
%<package>\cs_new_protected:Npn\@@_struct_kid_struct_gput_right:nn #1 #2
%<debug>\cs_set_protected:Npn\@@_struct_kid_struct_gput_right:nn #1 #2
%%#1 num of parent struct, #2 kid struct
{
\@@_seq_gput_right:ce
{ g_@@_struct_kids_#1_seq }
{
\pdf_object_ref_indexed:nn { @@/struct }{ #2 }
}
%<debug> \seq_gput_right:cn
%<debug> { g_@@_struct_debug_kids_#1_seq }
%<debug> {
%<debug> Struct~#2
%<debug> }
}
%<package>\cs_generate_variant:Nn \@@_struct_kid_struct_gput_right:nn {ee}
% \end{macrocode}
% \end{macro}
% \begin{macro}
% {
% \@@_struct_kid_struct_gput_left:nn,\@@_struct_kid_struct_gput_left:ee
% }
% This commands adds a structure as kid one the left, so as first
% kid. We only need to record the object reference in the sequence.
% \begin{macrocode}
%<package>\cs_new_protected:Npn\@@_struct_kid_struct_gput_left:nn #1 #2
%<debug>\cs_set_protected:Npn\@@_struct_kid_struct_gput_left:nn #1 #2
%%#1 num of parent struct, #2 kid struct
{
\@@_seq_gput_left:ce
{ g_@@_struct_kids_#1_seq }
{
\pdf_object_ref_indexed:nn { @@/struct }{ #2 }
}
%<debug> \seq_gput_left:cn
%<debug> { g_@@_struct_debug_kids_#1_seq }
%<debug> {
%<debug> Struct~#2
%<debug> }
}
%<package>\cs_generate_variant:Nn \@@_struct_kid_struct_gput_left:nn {ee}
% \end{macrocode}
% \end{macro}
%
% \begin{macro}
% {\@@_struct_kid_OBJR_gput_right:nnn,\@@_struct_kid_OBJR_gput_right:eee}
% At last the command to add an OBJR object. This has to write an object first.
% The first argument is the number of the parent structure, the second the
% (expanded) object reference of the annotation. The last argument is the page
% object reference
%
% \begin{macrocode}
%<package>\cs_new_protected:Npn\@@_struct_kid_OBJR_gput_right:nnn #1 #2 #3
%<package>
%<package>
%<debug>\cs_set_protected:Npn\@@_struct_kid_OBJR_gput_right:nnn #1 #2 #3
%%#1 num of parent struct,#2 obj reference,#3 page object reference
{
\pdf_object_unnamed_write:nn
{ dict }
{
/Type/OBJR/Obj~#2/Pg~#3
}
\@@_seq_gput_right:ce
{ g_@@_struct_kids_#1_seq }
{
\pdf_object_ref_last:
}
%<debug> \seq_gput_right:ce
%<debug> { g_@@_struct_debug_kids_#1_seq }
%<debug> {
%<debug> OBJR~reference
%<debug> }
}
%</package|debug>
%<*package>
\cs_generate_variant:Nn\@@_struct_kid_OBJR_gput_right:nnn { eee }
% \end{macrocode}
% \end{macro}
% \begin{macro}
% {\@@_struct_exchange_kid_command:N, \@@_struct_exchange_kid_command:c}
% In luamode it can happen that a single kid in a structure is split at a page
% break into two or more mcid. In this case the lua code has to convert
% put the dictionary of the kid into an array. See issue 13 at tagpdf repo.
% We exchange the dummy command for the kids to mark this case.
% Change 2024-03-19: don't use a regex - that is slow.
% \begin{macrocode}
\cs_new_protected:Npn\@@_struct_exchange_kid_command:N #1 %#1 = seq var
{
\seq_gpop_left:NN #1 \l_@@_tmpa_tl
\tl_replace_once:Nnn \l_@@_tmpa_tl
{\@@_mc_insert_mcid_kids:n}
{\@@_mc_insert_mcid_single_kids:n}
\seq_gput_left:NV #1 \l_@@_tmpa_tl
}
\cs_generate_variant:Nn\@@_struct_exchange_kid_command:N { c }
% \end{macrocode}
% \end{macro}
% \begin{macro}{ \@@_struct_fill_kid_key:n }
% This command adds the kid info to the K entry. In lua mode the
% content contains commands which are expanded later. The argument is the structure
% number.
%
% \begin{macrocode}
\cs_new_protected:Npn \@@_struct_fill_kid_key:n #1 %#1 is the struct num
{
\bool_if:NF\g_@@_mode_lua_bool
{
\seq_clear:N \l_@@_tmpa_seq
\seq_map_inline:cn { g_@@_struct_kids_#1_seq }
{ \seq_put_right:Ne \l_@@_tmpa_seq { ##1 } }
%\seq_show:c { g_@@_struct_kids_#1_seq }
%\seq_show:N \l_@@_tmpa_seq
\seq_remove_all:Nn \l_@@_tmpa_seq {}
%\seq_show:N \l_@@_tmpa_seq
\seq_gset_eq:cN { g_@@_struct_kids_#1_seq } \l_@@_tmpa_seq
}
\int_case:nnF
{
\seq_count:c
{
g_@@_struct_kids_#1_seq
}
}
{
{ 0 }
{ } %no kids, do nothing
{ 1 } % 1 kid, insert
{
% in this case we need a special command in
% luamode to get the array right. See issue #13
\bool_if:NTF\g_@@_mode_lua_bool
{
\@@_struct_exchange_kid_command:c
{g_@@_struct_kids_#1_seq}
% \end{macrocode}
% check if we get null
% \begin{macrocode}
\tl_set:Ne\l_@@_tmpa_tl
{\use:e{\seq_item:cn {g__tag_struct_kids_#1_seq} {1}}}
\tl_if_eq:NNF\l__tag_tmpa_tl \c_@@_struct_null_tl
{
\@@_struct_prop_gput:nne
{#1}
{K}
{
\seq_item:cn
{
g_@@_struct_kids_#1_seq
}
{1}
}
}
}
{
\@@_struct_prop_gput:nne
{#1}
{K}
{
\seq_item:cn
{
g_@@_struct_kids_#1_seq
}
{1}
}
}
} %
}
{ %many kids, use an array
\@@_struct_prop_gput:nne
{#1}
{K}
{
[
\seq_use:cn
{
g_@@_struct_kids_#1_seq
}
{
\c_space_tl
}
]
}
}
}
% \end{macrocode}
% \end{macro}
% \subsection{Output of the object}
% \begin{macro}{\@@_struct_get_dict_content:nN}
% This maps the dictionary content of a structure into a tl-var.
% Basically it does what |\pdfdict_use:n| does.
% This is used a lot so should be rather fast.
% \begin{macrocode}
\cs_new_protected:Npn \@@_struct_get_dict_content:nN #1 #2 %#1: structure num
{
\tl_clear:N #2
\prop_map_inline:cn { g_@@_struct_#1_prop }
{
% \end{macrocode}
% Some keys needs the option to format the value, e.g. add brackets for an
% array, we also need the option to ignore some entries in the properties.
% \begin{macrocode}
\cs_if_exist_use:cTF {@@_struct_format_##1:nnN}
{
{##1}{##2}#2
}
{
\tl_put_right:Ne #2 { \c_space_tl/##1~##2 }
}
}
}
% \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_struct_format_rolemap:nnN,\@@_struct_format_parentrole:nnN}
% This two entries should not end in the PDF.
% \begin{macrocode}
\cs_new:Nn\@@_struct_format_rolemap:nnN{}
\cs_new:Nn\@@_struct_format_parentrole:nnN{}
% \end{macrocode}
% \end{macro}
% \begin{macro}{\@@_struct_format_Ref:nnN}
% Ref is an array, we store values as a clist of commands that must