-
Notifications
You must be signed in to change notification settings - Fork 0
/
rexxxml.tex
8159 lines (6948 loc) · 348 KB
/
rexxxml.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
%% Documentation for RexxXML
% The contents of this file are subject to the Mozilla Public License
% Version 1.1 (the "License"); you may not use this file except in
% compliance with the License. You may obtain a copy of the License at
% http://www.mozilla.org/MPL/
% Software distributed under the License is distributed on an "AS IS"
% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
% License for the specific language governing rights and limitations
% under the License.
% The Original Code is RexxXML.
% The Initial Developer of the Original Code is Patrick TJ McPhee.
% Portions created by Patrick McPhee are Copyright © 2003
% Patrick TJ McPhee. All Rights Reserved.
% Contributors:
% $Header: C:/ptjm/rexx/rexxxml/RCS/rexxxml.tex 1.21 2003/10/31 16:10:46 ptjm Rel $
\documentclass[twoside]{report}
% twoside option adds extra margin for binding, but this does not
% work well for on-line viewing. It ought to be handled by the dvi
% or pdf processor.
\advance\oddsidemargin by \evensidemargin
\advance\oddsidemargin by -1in
\divide\oddsidemargin by 2
\evensidemargin=\oddsidemargin
% the pages are too short -- the goal here is not to have a lot of blank
% paper
\advance\textheight by 5\baselineskip
\advance\textwidth by 1in
% if run through pdftex, use ps fonts and turn on hyperlinking
\ifx\pdfoutput\undefined
\def\rarrow{\ensuremath{\to}}
\usepackage{hyperref}
\else
\usepackage[pdftex,pdfborder=0 0 0]{hyperref}
% uncomment for older hyperref (or update your hyperref!)
% \def\pdfBorderAttrs{/Border [0 0 0]}%
\def\_{\textunderscore\penalty\hyphenpenalty}
\usepackage[T1]{fontenc}
\usepackage{times}
\usepackage{mathptm}
\makeatletter
\def\verbatim@font{\normalfont\ttfamily\small}
\makeatother
\font\symbol=psyr
\def\rarrow{{\symbol\char174}}
\pdfinfo { /Title (RexxXML Usage and Reference)
/Author (Patrick TJ McPhee)
/Subject (RexxXML User Guide)
/Keywords (rexx,Regina,XML,XPath,XSLT)
/Version (1.0.0)
/Copyright (Copyright 2003, Patrick TJ McPhee)
}
\hypersetup{
pdfstartview=FitH
}
\fi
\lefthyphenmin=2
\righthyphenmin=3
\hyphenation{which-ever xml-Nodeset-Item at-tri-bute xml-Ver-sion xml-Add-Element
xml-Node-set-Item xml-Node-Ptr}
\usepackage{array}
\usepackage{longtable}
\usepackage{makeidx}
\usepackage[pdftex]{graphicx}
% for function names -- normal font, put on parens, with a tick
% of space between them to keep them from flowing together
\def\fn#1{#1(\,)}
% for XML tags -- normal font, with left and right angle brackets
% (which have a steeper incline than greater/less-than)
\def\tag#1{$\langle$#1$\rangle$}
\makeindex
\newtoks\rcmsg
\begin{document}
\pagestyle{empty}
\null\vfill
\begin{center}
{\LARGE RexxXML Usage and Reference}
\vspace{18pt}
{\large Patrick TJ McPhee ([email protected])
\vspace{9pt}
Version 1.0.0
\vspace{9pt}
31 October 2003
}
\vfill
\includegraphics[width={6cm}]{tree}
\end{center}
\cleardoublepage
\pagenumbering{roman}
\pagestyle{headings}
\tableofcontents
\cleardoublepage
\pagenumbering{arabic}
\chapter{Introduction}
The RexxXML library provides a Rexx interface to data represented using HTML or
any XML dialect. The intent is to allow XML data to be processed in a
straight-forward manner within a Rexx program, and to allow Rexx to be
used from within certain XML-based applications.
Rexx is a programming language which was designed to be learned and used
easily by non-professional programmers. It is meant to make it easy to
write programs, at the expense of complicating the language
implementation. Its main characteristics are the absence of program
structure, isolation from machine limitations, integration with application
environments, and a set of built-in
functions which is useful for a wide array of data processing applications.
HTML is an SGML application which was designed for on-line
presentation of technical reports, and which is firmly entrenched as the
primary data representation on the world-wide web. XML is an SGML
application profile which was intended to allow greater freedom in marking
up web content, and which has gained some currency as a data exchange
protocol. It was specifically designed to simplify parsing (as compared
to the full generality of SGML) at the expense of complicating data
generation. Most document-generation-friendly features of SGML have been dropped,
even those which don't make parsing more complex.
In that sense, XML could be considered the anti-Rexx.
SGML\index{standards!ISO SGML} is an ISO standard language for defining document mark-up.
RexxXML's XML processing is provided by the Gnome project's libxml and
libxslt, both written by Daniel Veillard.
RexxXML does not attempt to provide a full interface to those
libraries, and it may provide less flexibility at the expense of greater
simplicity. Books and other reference material about those packages can
still be helpful in using RexxXML.
This guide is both an introduction to XML processing using RexxXML and a
complete reference to the library. The reader is not expected to be familiar
with Rexx or XML syntax~-- I include a brief introduction to both, as
well as the XML-related technologies, XPath and XSLT~-- but additional
reference materials are needed to write effective programs.
\section{Installation}
The RexxXML package includes pre-compiled binaries for Win32
% and OS/2
platforms
and source code which should compile on those platforms and most Unix systems.
It does not include libxml, libxslt, or a Rexx interpreter.
The distribution also does not include an installation program. The
general installation instructions are to copy the appropriate library
file to an appropriate directory. From this package, only rexxxml.dll
(or, on Unix, librexxxml.so) is required to use the library,
and applications using it can be distributed with only this file.
The documentation file, rexxxml.pdf, should also be distributed if
end users are expected to write macros using these functions.
More specific instructions for each
platform follow.
\subsection{Win32}
If you don't have a Rexx interpreter, I suggest obtaining
\href{http://regina-rexx.sf.net}{Regina
Rexx}\footnote{http://regina-rexx.sf.net}.\index{getting!Rexx}\index{Rexx!getting} There are
several other interpreters available for win32 platforms. See
the\index{Rexx!Language Association}
\href{http://www.rexxla.org}{Rexx Language Association}\footnote{http://www.rexxla.org}
web site for details. Note that the port of Regina which was included
with the Windows NT resource kit is not suitable for use with libraries
such as RexxXML.
You must obtain libxml and libxslt from the
\href{http://xmlsoft.org}{libxml}\footnote{http://xmlsoft.org}\index{getting!libxml}\index{libxml!getting}
and
\href{http://xmlsoft.org/XSLT}{libxslt}\footnote{http://xmlsoft.org/XSLT}\index{getting!libxslt}\index{libxslt!getting}
web pages. My recommendation is to get the pre-compiled windows binaries
which are available from a link on those pages. RexxXML may or may not work with
binaries obtained from other sources. If you want to, for instance,
add Rexx support to an application which uses a non-standard build of
libxml, you may need to rebuild either RexxXML or the application.
There are two pre-compiled libraries in the distribution file. If you
use Regina, the appropriate file is RexxXML/win32/rexxxml.dll. If you
use any other interpreter, the appropriate file is
RexxXML/rexxtrans/rexxxml.dll, and you need to obtain RexxTrans from
\href{http://rexxtrans.sf.net}{its web site}\footnote{http://rexxtrans.sf.net}.
To install the pre-compiled library, copy the appropriate version
of rexxxml.dll to either a directory in your program search path or the directory
containing the Rexx executable.
See section \ref{sec:compiling} for information about compiling the
library from source code.
\subsection{OS/2}
I apologise that the OS/2 port has not been completed as of the initial
release of the library. I couldn't find libxslt at Hobbes, and the libxml
there is fairly old. I'll release it once I track down or build
OS/2 ports for these libraries.\index{to do!OS2/ port}
%The pre-compiled library RexxXML/os2/rexxxml.dll will work with either
%Classic or Object Rexx, probably on any 32-bit version of OS/2. It
%requires the EMX run-time environment, libxml, and libxslt, all of which
%are available from the \href{ftp://hobbes.nmsu.edu}{Hobbes FTP site}\footnote{ftp://hobbes.nmsu.edu}.
%Copy rexxxml.dll to a directory in your LIBPATH, and it should be ready
%to go.
\subsection{Unix}
The distribution does not include a configuration script, but it includes
make files which have been known to work using the stock vendor compiler
on several Unix systems. If you have one of those systems, link the
appropriate make file to the name `Makefile' and build the `dist'
target. For instance, on Solaris:
\begin{verbatim}
ln Makefile.sun Makefile
make dist
\end{verbatim}
On most platforms, this builds a shared library called librexxutil.so.
On HP-UX, the file is called librexxutil.sl, and on AIX, it's called
librexxutil.a. The path to this library can be set in three ways:
Most Unix systems allow a shared library search path to be embedded
into program files. If you build Regina (or your Rexx-enabled
application) such that this path is set to include a directory
such as /opt/regina/lib or /usr/local/lib, you can install
RexxXML by copying the shared library to this directory (see section
\ref{sec:compiling} for more information). If this is not possible,
you need to either set an environment variable or change the way
the system searches for shared libraries.
Unix systems typically use a different path for shared libraries than
they do for program files. The name of the environment variable used
for the shared library path is not standardised, however most systems
use LD\_LIBRARY\_PATH. Notable exceptions are AIX (LIBPATH) and
HP-UX (SHLIB\_PATH for 32-bit executables, LD\_LIBRARY\_PATH for
64-bit executables). To install RexxXML, add an appropriate directory
to the shared library path and copy the shared
library to that directory.
Finally, some systems provide a utility (often called ldconfig) which
can be used either to set the standard search path for shared libraries,
or to provide a database of shared libraries. On such a system, RexxXML
can be installed by copying the shared library to an appropriate
directory and using this utility to add it to the search database.
You'll need to consult your system documentation for more information.
\subsection{Notes on compiling}
\label{sec:compiling}\index{compiling}I provide make files for the
stock vendor compilers on several Unix systems. On Windows, I provide
make files for visual c++ (Makefile.nt) and the MinGW port of gcc
(Makefile.mingw).
%On OS/2, I provide a make file for the EMX port of gcc (Makefile.emx).
The Unix make files set platform-specific
variables and then load Makefile.inc, which contains the rules
for building the libraries. The win32
% and OS/2
make files contain all the
rules for building the library with their respective compilers.
I find it convenient to either link or copy the platform-specific
make file to the name Makefile.
The supplied make files expect libxml2 and libxslt to be installed
in the default location under /usr/local on Unix systems. You will
almost certainly have to edit the win32 make files to specify
the location of the include and library files.
Some parts of libxml and libxslt are optional (for instance,
schema support doesn't have to be compiled in). In this version
of RexxXML, compilation will fail if a required optional part of
libxml is not available. I will handle this better in a future
release. You may also have compile or link failures if you use
an older version of libxml or libxslt. The solution is to move
to the current versions of these libraries.
By default, the library is built with optimisation disabled and
debugging symbols included. This is convenient for library
development, however you'll get better performance
if you build the dist target (with the command `make dist').
To port the library to a new platform or to a new compiler on a
platform for which a make file exists, it should be sufficient to copy
an existing make file and change some of these variables. On Unix, to
change the compiler for an existing platform, it should be sufficient to
redefine PCFLAGS and POPT. If the new compiler is gcc, the values
can be taken from Makefile.bsd. The intent of each variable is indicated in
the table:
\goodbreak
\begin{longtable}{llp{9.5cm}}
\it Variable&\it Specific to&\it Purpose\\
\endhead
PDEBUG&Compiler&Flags in addition to -g required for creating programs
which can be examined in the debugger. This is relevant only if you
want to create a debug build;\\
POPT&Compiler&Flags which cause optimisation to be performed by the
compiler. At least -O should be used for all production code, in
my opinion;\\
PCFLAGS&Compiler&Compiler flags which should be set for both debug
and optimised compilation. This should include a flag for generating
relocatable (sometimes called position-independent) code;\\
PLDFLAGS&System&Flags for ld. This must include something
to cause ld to create a shared library, and a -L flag to give the
location of the libxml and libxslt libraries. On most platforms, it is
not necessary to link to the Rexx shared library, but you may
require special ld flags to ignore unresolved symbols;\\
PLIBS&System&Libraries required to resolve symbols used
in the library. This does not generally have to include the Rexx shared
library, since the Rexx interpreter will usually be running before
the library is loaded, but it must include -lxml and -lxslt, as
well as any libraries required by those libraries on your platform;\\
REXX\_INCLUDE&System&Compiler include flag to include rexxsaa.h
(defaults to --I/usr/local/include);\\
XML\_INCLUDE&System&Compiler include flag to include libxml/xmlVersion.h,
libxslt/xslt.h, {\it et al}
(defaults to -I/usr/local/include/libxml2);\\
XML\_LIBDIR&System&Flags to cause ld to find libxml2.a and libxslt.a
(defaults to -L/usr/local/lib).\\
\end{longtable}
The win32 make files have a different set of make variables. Due to the
nature of the win32 development environment, the distinction between
platform-specific and compiler-specific values doesn't exist.
\begin{longtable}{lp{12cm}}
\it Variable&\it Purpose\\
\endhead
DEBUG&Flags required for creating programs
which can be examined in the debugger. This is relevant only if you want
to create a debug build;\\
POPT&Flags which cause optimisation to be performed by the
compiler;\\
PCFLAGS&Compiler flags which should be set for both debug
and optimised compilation. This may include flags for creating relocatable code;\\
PLDFLAGS&Flags for linking. This must include something
to cause ld to create a DLL. The NT make files use the
compiler to link;\\
REXX\_INCLUDE&Compiler flag to set the directory containing rexxsaa.h;\\
REXX\_LIB&Full path to the Rexx library;\\
XML\_INCLUDE&Compiler flag to set the directory containing rexxsaa.h;\\
XML\_LIB&Either empty or a compiler flag to set the path to the
XML libraries;\\
PLIBS&Either a full path to the XML libraries or flags to include the
appropriate libraries, depending on the compiler.\\
\end{longtable}
\section{Reporting bugs}
I would like RexxXML to be useful and reliable. There will
always be room for improvement, and I appreciate hearing about problems.
If you do find a bug, an error in the documentation, or you simply have
a suggestion for improving the distribution, please send me details
at [email protected]. It's useful to know the operating system you're
using, the Rexx interpreter and its version, and the version of RexxXML,
and to have a set of steps for reproducing the bug. The example below
shows how to retrieve the interpreter and library version information:
\begin{verbatim}
/* report useful version information */
parse version ver
say 'Interpreter:' ver
call rxfuncadd 'xmlversion', 'rexxxml', 'xmlversion'
say 'RexxXML:' xmlversion()
\end{verbatim}
\section{Using RxFuncAdd}
RexxXML\label{sec:rxfuncadd} provides two entry points: xmlLoadFuncs\index{xmlLoadFuncs}
and xmlVersion\index{xmlVersion}.
xmlLoadFuncs must be loaded
using RxFuncAdd\index{RxFuncAdd}, and then invoked to register the rest
of the functions with the Rexx interpreter and initialise libxml and
libxslt.
RxFuncAdd takes three arguments~-- the name of the function as it will
be used in the Rexx program, the name of the library from which to load
the function, and the name of the function as it appears in the library.
RxFuncAdd returns 0 on success, or 1 on failure. Regina has a function
called RxFuncErrMsg\index{RxFuncErrMsg} which can give useful information about the reason
for a load failure. A few common reasons for failure are:
\index{RxFuncAdd!reasons for failure}Re-registration: RxFuncAdd will fail if
the Rexx function name (the first argument) duplicates a previously-registered
function. This sometimes happens with IBM's interpreter because functions
remain registered after a program finishes running, unless they are explicitly
dropped. You can test for this condition using RxFuncQuery;
Path issues: the library is called rexxxml.dll on Win32
% and OS/2
platforms,
librexxxml.a on AIX, librexxxml.sl on HP-UX, and librexxxml.so on
other Unix platforms. On Win32, this file needs to be in the path, or
in the directory containing the Rexx interpreter. On
%OS/2 and
AIX, it needs
to be in a directory listed in LIBPATH. On most other Unix systems, it
needs to be in a directory listed in LD\_LIBRARY\_PATH. Some systems
have an ldconfig utility which allows you to forego setting any environment
variables;
Case sensitivity: on some platforms, with some Rexx interpreters,
the case of the last two arguments to RxFuncAdd must match the
case of the library name as it appears in the filesystem and
the case of the function name as it appears in the library.
Try loading `xmlloadfuncs' rather than `xmlLoadFuncs',
and `rexxxml' rather than `RexxXML'. Regina allows you to omit the `lib'
prefix and any suffix. Other interpreters may require the full file name
to be included in the second argument;
Windows 95: early releases of windows 95 did not include msvcrt.dll, the
C run-time library used by RexxXML. This library is sometimes installed
with applications software. It can also be obtained through service packs,
or from the Microsoft web site;
%OS/2: the pre-built version of RexxXML requires the EMX run-time
%environment. This can be obtained from the Hobbes archive;
libxml, libxslt: you must have libxml and libxslt installed in the
appropriate locations. These are available through the
\href{http://xmlsoft.org}{libxml} web page;
Rexx.exe: Regina includes two executables, one called `rexx', and
the other called `regina'. The difference is that `rexx' includes the Rexx
interpreter as part of the executable, while `regina' loads the interpreter
from a shared library. RxFuncAdd works only with the `regina' version of
the interpreter (the `rexx' version is slightly faster, though). Other
interpreters typically have one executable called rexx.exe, which works
as you'd expect.
\section{Licensing}
RexxXML is distributed free of charge in the hopes that it will be
useful, but without any warranty. This version is distributed under the terms of the Mozilla
Public License. The precise details of the licence are found in the file
MPL-1.1.txt in the distribution.
If you use the library purely as distributed by me, then you can
cheerfully ignore the licensing. If you modify the source code or
adopt portions of it in your own programs or libraries, you should be
aware of and fulfil your obligations under the licence.
Although there are no obligations or restrictions related to use of the
library, I would prefer that you do not use RexxXML in applications
which cause injury or hardship to others. Also, if you derive a
significant monetary benefit from the use of RexxXML, please share a
portion with someone less fortunate. I'm always pleased to receive
good wishes, but Daniel Veillard and his contributers are responsible
for the bulk of the work.
\chapter{The Rexx Language}
Rexx was designed in the late 1970s by
\href{http://www.rexx.hursley.ibm.com}{Mike Cowlishaw}\footnote{http://www.rexx.hursley.ibm.com}
of IBM. His goal was to
create a language which would be simple enough for use by people who are not
professional programmers, and which can be integrated into operating systems and
application programs. This section gives an overview of the language which
should make it possible for readers unfamiliar with Rexx to understand the
examples later in the guide. There are several on-line introductions to the language (see the
\href{http://www.rexxla.org/Links/links.html}{Rexx Language Association}
web\index{Rexx!Language Association}
site for some links), and most interpreters include the reference information
needed to write actual programs. Cowlishaw's book {\it The Rexx Language} is
also a worth-while resource.
There are a few dialects of Rexx. Object Rexx is an object-oriented extension
which IBM introduced in the mid-1990s. Roo is a simpler object-oriented
extension by \href{http://www.kilowattsoftware.com/rooPage.htm}{Kilowatt
Software}\footnote{http://www.kilowattsoftware.com/rooPage.htm}. NetRexx is a java-based
dialect written by Mike Cowlishaw around the time Java was introduced. The
remaining dialects are loosely grouped together as `classic' Rexx, and differ
primarily in the built-in functions they provide. There is an ANSI standard\index{standards!ANSI Rexx}
for classic Rexx, and the general trend among implementations is to support
the functions and behaviour specified by ANSI. This section is an introduction
to classic Rexx.
\section{Overview}
A Rexx program consists of a sequence of statements, usually contained in a
single source file. There's no required formal structure~-- execution begins
with the first statement encountered, and continues until either there are no
more statements or the program exits explicitly. Variables do not have to be
declared, and do not have data types\index{data types!Rexx}~-- any variable can be used to hold any
kind of data. There are no reserved words, and variable names are not
case-sensitive.
Arithmetic is not limited by the machinery in use~-- calculations use
the precision specified by the programmer. These features reduce the work
needed to write simple programs.
At the same time Rexx has some features to help with writing longer
applications. The language supports sub-routines, which may be contained in
external files, and it has standard mechanisms for passing commands to a
controlling application and executing functions written in other languages. An
exception mechanism can trap the use of variables which have not been assigned
values, and there's support for rudimentary debugging built in to the
language.
Rexx is an interpreted language, which means the language instructions
are executed directly, rather than being translated into the instruction
set of the computer running the program. A program can generate a
statement and then call on the interpreter to execute it, which allows Rexx
programs to be self-modifying to some extent. Most language interpreters
convert the text representation of a program into a form which can be
easily and quickly executed. Rexx interpreters often allow this
intermediate form to be saved and restored, which can reduce the
start-up time for large programs, and which provides some protection
against program modification by end-users.
\section{Comments}
\index{comments!Rexx}Comments are parts of a program which can be used to provide
commentary directed at people reading the code. Often, comments will
explain what the program does and how
it does it, why a particular algorithm was used, or clarify obscure
code. They can also record information such as the name of the author,
current revision number, or copyright.
In Rexx, comments begin with \verb+/*+ and end with \verb+*/+. Some
interpreters require that the first thing in every program be a comment,
and it's a good idea to at least record what the program does unless
you're planning to delete it as soon as it finishes running. There's
no limit on how long a comment can be, and they can occupy any number
of lines. The interpreter throws comments away as they are read, so
they have no effect on the program, except that a comment between
two symbols separates the symbols.
Comments nest, meaning each occurrence of \verb+/*+ within a comment
must have a corresponding \verb+*/+. The benefit of this is you can
easily comment out blocks of code. Apart from this, comments have no
syntactic requirements.
\begin{verbatim}
/* Program to demonstrate comments
Patrick TJ McPhee, 2003/07/07 13.33.57 */
a = 3 /* a comment can go at the end of the line */
/* or at the beginning (but don't!) */ b = 4
c = a/* or in the middle */b
d = a/**/b * 2
e = (a/**/b) * 2
/* at this point, a = 3, b = 4, c = 34, d = 38, and e = 68
/* this is a nested comment */
*/
\end{verbatim}
\section{Statements}
Anything that's not a comment is either an instruction to the Rexx
interpreter, a command to be executed by the controlling application, a
variable assignment, a label, a blank line, or a syntax error.
I'll distinguish between instructions, commands, and assignments,
and collectively refer to them as
`statements'.\index{statement}\index{instruction}\index{command}
Rexx instructions always begin with one of the keywords address, arg, call,
do, drop, else, end, exit, if, iterate, interpret, leave, nop, numeric,
options, parse, procedure, pull, push, queue, return, say, select, signal, then, or
trace, followed by other tokens, whose meanings depend on the keyword. Labels
consist of a symbol followed by a colon. Variable assignments consist of
a variable name, an equals sign, and an expression.
Any other non-blank lines must
consist of an expression whose value is passed to the controlling application.
\begin{verbatim}
'cp -p file1 file2' /* string is evaluated and treated as a command */
a /* A is evaluated and treated as a command */
a = 3 /* 3 is evaluated and assigned to A */
drop a /* the drop instruction -- A is unassigned */
a: /* the label A */
\end{verbatim}
Statements are terminated by either a semi-colon or the end of the
line, whichever comes first, although they can be extended over more
than one line by placing a comma at the end of the line. This can lead
to confusion in some situations, so you should be careful to make such
a comma stand out.
Normally, instructions and assignments are written literally, while
a command is an expression which evaluates to
the command text. Occasionally, it's useful to create a Rexx instruction
dynamically, for instance, to allow a subroutine name to be supplied at
run-time. This can be done through the `interpret'
instruction\index{interpret}. The syntax is \verb_interpret_ {\it expression},
where {\it expression} evaluates to one or more valid Rexx statements. When using
literal strings in {\it expression}, one must be
careful to use enough quotation marks around strings that should appear in
the final statement.
\begin{figure}[htb]
\begin{verbatim}
/* suppose fnname = 'myfn' and arg2 = 10. This statement is
* equivalent to var = myfn("arg1, which is a string", 10) */
interpret 'var =' fnname'("arg1, which is a string",' arg2')'
\end{verbatim}
\removelastskip
\end{figure}
Note that to assign a value to a variable whose name is determined
dynamically, you can
use the \fn{value} function, as discussed in section \ref{sec:assignment}.
This is likely to be more efficient and easier to read than `interpret'.
Rexx programs normally execute until they run out of statements, have
a fatal error, or encounter an `exit' instruction\index{exit}. The syntax is
\verb+exit+ {\it value}, where {\it value} is a numeric return code for the
program. Most operating systems expect this to be 0 for success or a non-zero
value for failure.
\section{Variables, constants, and expressions}
\subsection{Symbols}
Rexx expressions are
made up of symbols, literal strings, and operators.
A symbol\index{symbol!definition}\index{names!Rexx} is a combination of letters
(a--z)\index{letter!definition}, digits (0--9)\index{digit!definition},
question and exclamation marks (?!), underscores (\textunderscore) and periods
(.). Some other characters ({\it e.g.}, @, \$, and \#) can be used in
symbols with some interpreters. This is legal according to the standard,
but applications which take advantage of it will not be portable to other
interpreters\index{standards!gotta love them}.
Symbols are used for various purposes throughout Rexx programs.
Variable names, keywords, labels, and numbers are all types of symbol.
When the Rexx interpreter encounters a symbol in a program, it first converts all
the letters to upper-case. This means two symbols which differ only in case,
for example \verb+if+ and \verb+If+, are treated precisely the same way
by the Rexx interpreter: Rexx symbols are case-insensitive.
\subsection{Variables}
Variables are mechanisms for storing values for later use. When we give
a variable a value, we say we are assigning a value to the variable, and
when we use the value, we say we are evaluating the variable, or that the
variable evaluates to some value.
In Rexx, a
variable's name can be any symbol, except for those that start with a digit or period.
Apart from that, there are no restrictions on the names which can be
used. It's legal to use a Rexx keyword (such as `options' or `signal') as a variable
name. There are certain variable names which you should avoid
using, though, since they are sometimes assigned values by the Rexx
interpreter: `rc', `result', and `sigl'.
Uninitialised variables\index{variable!uninitialised} evaluate to the variable's name, with all the
letters converted to upper-case (unless the novalue condition is in
effect~-- see section \ref{sec:conditions}). A variable can be assigned a value, then
returned to pristine condition using the `drop' instruction\index{drop}:
\begin{verbatim}
drop variable
\end{verbatim}
Variables with periods in their names are called compound
variables.\index{variable!compound}
The symbol up to and including the first dot is called the stem\index{stem}, and the
rest is called the tail\index{tail}. A value assigned to a stem acts as a default
for all compound variables based on that stem, and dropping the
stem causes all the compound variables based on that stem to be dropped.
We frequently refer to all the compound variables based on a single stem
as `a stem variable'.\index{variable!stem}
When a compound variable is evaluated, each
dot-delimited component of the tail is evaluated, then the tail is
appended to the stem, and the whole compound is evaluated. If \verb$i$
is equal to 3, \verb$x.i$ and \verb$x.3$ are the same variable.
The tails can contain any data. It can be numeric, a string, binary
data, or anything you like.
In many languages, a record\index{record structures} is a set of related
information, stored in the same variable. The data is accessible through a
fixed set of named components, called `fields'. Stems can simulate records by
using tails as fields. \verb$X.author$ could hold the name of a book's author,
for instance, while \verb$X.title$ holds its title. You need to be careful in
this case that you don't use `author' and `title' as variables, since that
would lead to bugs or at least make it less convenient to use the record
structure. One convention is to precede symbols which are being used as fields
with !, 0, or \textunderscore, and never start variable names with ! or
\textunderscore.
Arrays\index{arrays} are are also sets of information, however instead of having a
fixed set of fields, they form associations between one set of data,
the elements, and another, the indices. Array indices are commonly
restricted to be integers within some range (for instance, 1 to $n$,
where $n$ is the declared size of the array).
Rexx compound variables are like arrays which can can have any values for
indices. They can
simulate a numeric array by using the numeric index convention.
The numeric convention is for the 0 element to contain the number of array
elements $n$, while elements 1 to $n$ contain the data.\index{numeric
index convention}
A compound variable can simulate a multi-dimensional array by separating
the two dimensions with a dot.\index{arrays}
\begin{figure}[htbp]
\begin{verbatim}
/* print all the elements in an `array'. x.0 and each x.i.0 must
* have been set when the array was assigned its values */
do i = 1 to x.0
do j = 1 to x.i.0
say x.i.j
end
end
\end{verbatim}
\removelastskip
\end{figure}
One significant difference between stems and arrays or records in other
languages is that a stem is not a single variable. You cannot assign one
stem to another or pass it to a function, although you can pass the name of a stem
to a function.
\index{variable!compound}\index{variable!stem}
\subsection{Assignment}
\label{sec:assignment}Variables can be set using an assignment statement, the `value' function,
the `parse' instruction, loop iteration, and by functions such as the
ones in the RexxXML package. Here are a few examples:
\begin{verbatim}
var = value /* evaluates `value' and assigns the result to var */
call value 'var', value /* does the same thing -- here, value is
both a function name and a variable
used as a function argument */
parse var value var /* does the same thing -- here, var is
both a keyword and the name of a
variable used in a template */
\end{verbatim}
An assignment statement is a variable name, an equals sign (=), and an
expression (see sections \ref{sec:arithmetic}, \ref{sec:strings}, and \ref{sec:conditionals}).
It evaluates the expression and assigns the result to the variable.
The parse\index{parse}\label{sec:parse} instruction is probably the most complicated thing in Rexx and
a full treatment goes well beyond my goals for this overview.
The most general syntax is \verb+parse+ {\it something} {\it template}.
It
evaluates {\it something}, then breaks the result into fields and assigns them
to variables according to {\it template}. Examples in this guide will
use \verb+parse var+ {\it variable} {\it template}, which evaluates a variable,
\verb+parse value+ {\it expression} \verb+with+ {\it template}, which evaluates an arbitrary
expression, and \verb+parse arg+ {\it template} \verb+,+ {\it template} \dots, which
evaluates an argument list. In this case, {\it template} is repeated for each
expected argument of a subroutine.
Parse templates can be confusing, however in this guide only two simple
types are used: the list of variables, and the list of variables
separated by literal strings. When the template is a list of variables,
the value being parsed is broken up at spaces, and each field is assigned
to the corresponding variable from the list.
When variable names are separated by a literal string, the string value is
used as a delimiter instead.
Periods (.) can be used instead of variable names in any parse template,
in which case the corresponding field is discarded rather than being
assigned to a variable.
If there are left-over variables after all the fields have been assigned,
they are assigned the zero-length string. If there are not enough variables
to hold all the fields, the last variable is assigned all the left-over fields and
delimiters. It's common to use this feature to extract fields from a variable, one
field at a time.
\begin{verbatim}
parse value 'go to the store' with g t th s
/* g = 'go'; t = 'to'; th = 'the'; s = 'store' */
parse value 'alpha,bravo,charlie,delta' with a ',' b ',' . ',' d
/* a = 'alpha'; b = 'bravo'; d = 'delta' */
list = 'one two three'
parse var list car list
/* car = 'one'; list = 'two three' */
\end{verbatim}
The \fn{value} function either retrieves or sets the value of a specified
variable. Its first argument is an expression which evaluates
to the variable name, while the second is the value to assign to the
variable. The variable can be a Rexx variable or a variable in some
controlling environment, in which case the optional third argument defines
the environment in which
the variable is found. As I mentioned earlier, the \fn{value} function can be
used instead of the interpret instruction in cases where the variable
name to be updated is itself generated dynamically. I'll mention later
that \fn{value} can be used in a subroutine to access the contents of
a stem variable whose name has been passed as an argument.
\begin{verbatim}
call value var,val /* set value of a variable whose name
is stored in the variable `var' */
val = value(arg(2)'.'i) /* val = x.i, where x is the second
argument */
\end{verbatim}
Loop iteration and the RexxXML functions are discussed later.
\subsection{Constants}
Data in Rexx is treated as strings of characters. The strings can be read
from an external file or queue, returned by a function, or generated from constant
values stored in the Rexx program.
There are three kinds of constants: numeric symbols, strings, and
non-numeric symbols.
Numeric symbols\index{number!definition} are simply real numbers as they are usually expressed in
English-speaking countries. They consist of digits with an optional
sign and an optional
period representing the decimal, followed by an optional exponent. The
exponent is e or E followed by an optional sign followed by digits, and
it means to multiply the rest of the number by 10 raised to the power
of the number to the right of the E.
Constant strings\index{string!definition} are sequences of characters delimited by either
\verb+'+ or \verb+"+. The string delimiter can be represented by
doubling (\verb+'it''s'+) or by using the other delimiter
(\verb+"it's"+). A string may be represented in hexadecimal by
appending `x' to it, or in base-2 by appending `b' to it.
\begin{verbatim}
x = .232 /* assign a number to x */
x = '.232' /* exactly the same */
x = '2e 32 33 32'x /* exactly the same, hexadecimal */
x = '2e323332'x /* exactly the same */
x = '00101110001100100011001100110010'b /* but why? */
\end{verbatim}
A symbol\index{symbol!constant} which is not a number ({\it e.g.}, it contains a letter other
than e) but which is not a valid variable (starts with period or a digit)
is a constant whose value is itself upper-cased. This can be useful for
creating field names in a record represented by a compound variable\index{record structures}, however you should
know that ANSI\index{standards!ANSI Rexx} Rexx has reserved all symbols starting with period for
future `special' variable names.
\subsection{Arithmetic}
\label{sec:arithmetic}
Rexx provides arbitrary-precision arithmetic\index{arithmetic!arbitrary precision}. What this means is that
you can perform calculations to however many digits of accuracy you want,
within the limits of sanity, keeping in mind that some interpreters may have a
restrictive
definition of sanity. The default precision is 9, which is often too low.
Keeping in mind the time cost of calculations increases with the number
of digits of accuracy, I suggest\index{arithmetic!precision}\index{numeric digits}
using 20 digits for most purposes:
\begin{verbatim}
numeric digits 20
\end{verbatim}
Precedence\index{precedence!definition} is a weighting assigned to operators which determines which
operation is performed first when two operators appear in a row. Rexx
evaluates expressions in order of precedence, and the left to right.
The standard arithmetic operations are supported with the precedence set so
that they work the way you
might expect. Parentheses can be used to ensure the correct grouping of
operations. $3+2*5$ is 13, but $(3+2)*5$ is 25. The operators and their
precedences are:\index{arithmetic!precedence of operators}\index{precedence!arithmetic operators}
\begin{longtable}{llp{7.7cm}}
\it Operator&\it Precedence&\it Effect\\
\endhead
$+$&8&(as prefix) multiplies the next term by $1$\\
$-$&8&(as prefix) multiplies the next term by $-1$\\
$**$&7&raises the term to the left to the power of the integer term to the
right\\
$*$&6&multiplies the term to the left by the term to the right\\
$/$&6÷s the term to the left by the term to the right\\
$\%$&6÷s the term to the left by the term to the right and strips off
the non-integer portion\\
$//$&6÷s the term to the left by the term to the right and returns
the integer remainder\\
$+$&5&adds the term to the left to the term to the right\\
$-$&5&subtracts the term to the right from the term to the left\\
\end{longtable}
Cowlishaw's book contains a complete description of how calculations
are performed. I will say that the highest precedence\index{arithmetic!precedence of operators}\index{precedence!arithmetic operators}
operators are applied first, and equal precedence operations are applied left-to-right.
Note that $-x\mathbin{**}2$ is equivalent to $(-x)\mathbin{**}2$ or $x^2$, and not $-x^2$ as
you might expect.
Rexx does not include very many arithmetic functions, and in particular there
are no transcendental functions\index{arithmetic!transcendental functions}.
Some interpreters include these functions
as a loadable library, however they generally do not provide arbitrary
precision. There are also math libraries written in Rexx available on
the Internet, although there is currently no Rexx archive, so finding them
can be a challenge. I provide
a `mathematical bumper pack' from
\href{http://www.interlog.com/~ptjm}{my web
page}\footnote{http://www.interlog.com/\textasciitilde ptjm}. This
includes two math libraries written in C and an old version of
John Brock's RxxMath library, which is written
in Rexx and provides arbitrary precision.
The standard language does provide functions for converting between bases 2,
10, and 16 (\fn{b2x}, \fn{d2x}, \fn{x2b}, \fn{x2d}), for creating characters from
their character codes, expressed in bases 10 and 16 (\fn{c2d}, \fn{c2x},
\fn{d2c}, \fn{x2c}), for determining absolute value (\fn{abs}, maxima and minima
(\fn{max}, \fn{min}), decimal place conversion (\fn{format}, \fn{trunc}), and the
sign of a number (\fn{sign}), as well as a pseudo-random number generator
(\fn{random}).
\subsection{String manipulation}
\label{sec:strings}
Rexx has three ways to concatenate strings:\index{string!concatenating}
placing two expressions next to each other, separated by a space,
concatenates their values, with a space between them.
Placing two expressions next to each other, without so much as a space
between them, concatenates their values without a space between them.
This is only possible for certain kinds of expressions,
for instance a string followed by a variable other than `x' or `b'.
Finally, there's
the concatenation operator $\|$, which has the same effect as abutment, but
doesn't require the strings to be abutted.
Technically, you can abut any two expressions by placing a comment between them,
but it's simpler and clearer to use the concatenation operator.
Compared to the operators in the tables in sections \ref{sec:arithmetic}
and \ref{sec:conditionals}, the concatenation operators have precedence
4.\index{precedence!string operators}
\begin{verbatim}
x = 'nice'
y = 'day'
z1 = x || y /* z1 = 'niceday' */
z2 = x y /* z2 = 'nice day' */
z3 = x/**/y /* z3 = 'niceday' */
z4 = x''y /* z4 = 'niceday' */
\end{verbatim}
You can parse data from strings using the `parse' instruction\index{parse} (section
\ref{sec:parse}) and compare strings as described in section
\ref{sec:conditionals}, but all other string operations are performed
using built-in or third party functions. Hopefully the use of functions
in the examples will be self-explanatory, but here are a few examples of
functions I'm likely to use:
\begin{verbatim}
z1 = strip(' exceptional ') /* z1 = 'exceptional' */
z2 = substr('demanding', 3, 2) /* z2 = 'ma' */
z3 = substr('demanding', 3) /* z3 = 'manding' */
z4 = insert('r', 'mable', 2) /* z4 = 'marble' */
z5 = changestr('a', 'wat', 'i') /* z5 = 'wit' */
z6 = word('go by brooks', 2) /* z6 = 'by' */
z7 = words('what passing bells') /* z7 = 3 */
z8 = wordpos('b', 'a b c') /* z8 = 2 */
z9 = abbrev('water', 'wat') /* z9 = 1 */
z10 = abbrev('water', 'wit') /* z10 = 0 */
z11 = compare('water', 'wat') /* z11 = 4 */
z12 = compare('water', 'water') /* z12 = 0 */
z13 = pos('t', 'water') /* z13 = 3 */
z14 = translate('water') /* z14 = 'WATER' */
z15 = translate('horse', 'wrtae', 'heros') /* z15 = 'water' */
z16 = translate('horse', 'ndrik', 'shore') /* z16 = 'drink' */
\end{verbatim}
A table of the built-in functions in the ANSI standard appears in section
\ref{sec:builtins}.
\section{Subroutines}
\label{sec:subroutines}
A subroutine is a named collection of code which can be executed repeatedly.
It may return a value, in which case it's called a function, or it may simply