1\documentclass[a4paper]{report}
2%\usepackage[margin=2cm]{geometry}
3\usepackage[bookmarks]{hyperref}
4%\usepackage[australian]{babel}
5\usepackage[utf8]{inputenc}
6\usepackage[T1]{fontenc} % needed for italic curly braces
7\usepackage{xspace}
8%\usepackage{changebar}
9
10\newcommand{\code}[1]{\textnormal{\texttt{#1}}}
11
12\newcommand{\cogent}{Cogent\xspace}
13\newcommand{\Cogent}{\cogent\xspace}
14
15\newcommand{\TODO}[1]{\textbf{\textsl{TODO: #1}}}
16\newcommand{\todo}[1]{\TODO{#1}}
17
18\newlength{\prodindlen}
19\setlength{\prodindlen}{\textwidth}
20\addtolength{\prodindlen}{-2em}
21
22\newcommand{\gramprod}[2]{
23  \parbox{\textwidth}{
24  \setlength{\parindent}{2em}
25  \it\noindent #1:
26  
27  #2
28  \vskip 2ex
29  }}
30
31\newcommand{\indprod}[2]{
32  \parbox{\prodindlen}{
33  \setlength{\parindent}{2em}
34  \it\noindent #1:
35  
36  #2
37  \vskip 2ex
38  }}
39
40\newcommand{\gramselprod}[2]{
41  \parbox{\textwidth}{
42  \setlength{\parindent}{2em}
43  \it\noindent #1: one of
44  
45  #2
46  \vskip 2ex
47  }}
48
49\newcommand{\indselprod}[2]{
50  \parbox{\prodindlen}{
51  \setlength{\parindent}{2em}
52  \it\noindent #1: one of
53  
54  #2
55  \vskip 2ex
56  }}
57
58\newcommand{\graminfprod}[2]{
59  \parbox{\textwidth}{
60  \setlength{\parindent}{2em}
61  {\it\noindent #1: informal}
62  
63  #2
64  \vskip 2ex
65  }}
66
67\newcommand{\indinfprod}[2]{
68  \parbox{\prodindlen}{
69  \setlength{\parindent}{2em}
70  {\it\noindent #1: informal}
71  
72  #2
73  \vskip 2ex
74  }}
75
76
77\begin{document}
78
79\title{A \cogent Manual}
80\author{Gunnar Teege\\ gunnar.teege{@}unibw.de}
81\date{Version 0.9.1}
82
83\maketitle
84
85I wrote this manual when I had my first contact with \cogent and wanted to get a clear view of its syntax and its concepts.
86For me the best way to do so is by writing it down in an organized way. Since I found that there was no similar documentation,
87I decided to do so in a form that it may be usable (and hopefully useful) for others as well.
88
89Zilin Chen has read the manual very carefully and has pointed me to many issues, so I was able to correct and improve it 
90extensively. Many thanks to him for his commitment.
91
92This manual is not intended as a tutorial for programming in \cogent or to prove properties of \cogent programs. The examples
93are not chosen to be realistic, they are only used for illustrating the syntax. The manual only describes the \cogent
94``surface syntax'' which is the interface for the programmer. Note that most publications about \cogent refer to the more
95concise ``core syntax'' which is created from the surface syntax by applying the ``desugaring rules''.
96
97\chapter{Lexical Syntax}
98
99
100The basic lexical items in \cogent are comments (including document blocks and pragmas), identifiers (including reserved words), symbols and literals.
101Additionally in a \cogent program the usual Haskell preprocessor directives can be used, which are similar to the C preprocessor directives.
102
103
104\section{Comments}
105
106Comments have the same form as in Haskell. A comment may either be a line comment
107starting with the symbol \code{-}\code{-} and ending at the end of the line, or it may
108be a block comment enclosed in \code{\{-} and \code{-\}}.
109
110Examples of comments are:
111\begin{verbatim}
112  f: U8 -> U8 -- this is a line comment after a function signature
113  f x {- x is the function argument -} = x+1
114    -- function f returns its incremented argument
115\end{verbatim}
116
117Block comments may occur in a \cogent source between all other lexical entities.  Block comments can be nested,
118the closing brace of the inner comment does not end the outer comment:
119\begin{verbatim}
120  {- This is a comment with a nested {- inner comment. -} 
121     After it the outer comment continues. -} 
122\end{verbatim}
123
124
125
126A special form of comments are document blocks. They have a similar form like line comments but start 
127with the symbol \code{@}. Document blocks can be used to generate an HTML documentation from a Cogent source:
128\begin{verbatim}
129  @@ # Heading
130  @@ This is a standalone documentation block
131
132  @ Documentation for the following function
133  f: U8 -> U8
134  f x = x+1
135\end{verbatim}
136
137Another special form of comments are pragmas, they have the form
138\begin{verbatim}
139  {-# ... #-}
140\end{verbatim}
141Pragmas are used to optimise Cogent programs and to interface external C components. The details
142of pragmas are not (yet?) covered by this manual.
143
144
145\section{Identifiers}
146
147Identifiers are used to name items in a program. As usual in programming languages, they consist of
148a sequence of letters and digits beginning with a letter.
149
150\cogent syntactically distinguishes between lowercase identifiers and capitalized identifiers. 
151
152\vspace{2ex}
153\indinfprod{LowercaseID}{
154   A sequence of letters, digits, underscore symbols, and single quotes
155  
156  starting with a lowercase letter
157}
158
159\indinfprod{CapitalizedID}{
160   A sequence of letters, digits, underscore symbols, and single quotes
161  
162  starting with an uppercase letter
163}
164
165
166The underscore symbol \code{\_} and the single quote \verb|'| may appear in identifiers but not at the beginning. Examples for valid 
167identifiers are \code{v1}, \code{very\_long\_identifier}, \code{CamelCase}, \code{T}, \code{W\_}, and \code{v}\verb|'|.
168
169
170Lowercase identifiers are used for record field names and for term variables and type variables. Capitalized identifiers are
171used for type constructors and data constructors.
172
173There are some \textit{reserved words} in \cogent wich syntactically are identifiers but cannot be used as identifiers.
174The reserved words are in alphabetical order:
175\begin{verbatim}
176  all and complement else False if in include let not put
177  take then True type
178\end{verbatim}
179
180\section{Literals}
181
182There are four kinds of literals in \cogent.
183
184\subsection{Boolean Literals}
185
186The boolean literals are the reserved words \code{True} and \code{False}.
187
188\vspace{2ex}
189\indselprod{BooleanLiteral}{
190  \code{True} \code{False}
191}
192
193\subsection{Integer Literals}
194
195Integer literals are digit sequences.  They can be written in decimal, hexadecimal. or octal form.
196
197\vspace{2ex}
198\indprod{IntegerLiteral}{
199  DecDigits
200  
201  \code{0x} HexDigits
202  
203  \code{0X} HexDigits
204
205  \code{0o} OctDigits
206
207  \code{0O} OctDigits
208}
209
210\indinfprod{DecDigits}{
211  A sequence of decimal digits 0-9.
212}
213
214\indinfprod{HexDigits}{
215  A sequence of hexadecimal digits 0-9, A-F.
216}
217
218\indinfprod{OctDigits}{
219  A sequence of octal digits 0-7.
220}
221
222
223\subsection{Character Literals}
224
225A character literal consists of a quoted character.
226
227\vspace{2ex}
228\indinfprod{CharacterLiteral}{
229   A character enclosed in single quotes.
230}
231
232
233The type of a character literal is \code{U8} (see below), which corresponds to a single byte.
234Syntactically, a character literal can be specified as in Haskell (see the Haskell Report), i.e.,
235full Unicode and several escape sequences (such as \code{\\n}) are supported. However, a valid
236character literal in \cogent must always correspond to a code value in the range 0..255.
237
238Examples for valid character literals are \code{'h'}, \code{'8'}, and \code{'/'}. The quoted character
239\code{'}\verb|\|\code{300'} is not a legal character literal since it is mapped to code 300.
240
241
242\subsection{String Literals}
243
244A string literal consists of a quoted character sequence.
245
246\vspace{2ex}
247\indinfprod{StringLiteral}{
248   A sequence of characters enclosed in double quotes.
249}
250
251
252Syntactically a string literal can be specified as in Haskell (see the Haskell Report). The same
253escape sequences as for character literals are supported for specifying every character.
254For a valid \cogent string literal every character must be mapped to a code in the range 0..255.
255
256An example for a valid string literal is the string \code{"This is a string literal}\verb|\|\code{n"}. Again,
257the string \code{"String containing a} \verb|\|\code{300 glyph"} is not legal, since it contains a character
258mapped to code 300.
259
260
261\chapter{Types}
262
263The \cogent language is a strongly typed language, where every term and variable in a program has a 
264specific type. Like some other strongly typed languages, such as Scala and several functional languages
265types are often automatically inferred and need not be specified explicitly.  Although possible
266in many cases, \cogent never infers types for toplevel definitions (see Section~\ref{toplevel-def}), they
267must always be specified explicitly.
268
269In most typed programming languages a type only determines a set of values and the operations which 
270can be applied to these values. As a main feature of \cogent, types are extended to also represent 
271the way how values can be used in a program.
272
273\section{Type Basics}
274
275We will first look at the basic features of the \cogent type system, which are similar to those of types
276in most other programming languages. There are some predefined \textit{primitive} types and there
277are ways to construct \textit{composite} types from existing types.
278
279Note that in \cogent types can always be specified (e.g.~for variables or function arguments) by 
280arbitrarily complex \textit{type expressions}. It is possible to use a \textit{type definition} 
281to give a type a name, but it is not necessary to do so. In particular, types are matched by structural 
282equality, hence if the same type expression is specified in different places it means the same type.
283
284The general syntactical levels of type expressions are as follows:
285
286\vspace{2ex}
287\indprod{MonoType}{
288  TypeA1
289  
290  \ldots
291}
292
293\indprod{TypeA1}{
294  TypeA2
295  
296  \ldots
297}
298
299\indprod{TypeA2}{
300  AtomType
301
302  \ldots
303}
304
305\indprod{AtomType}{
306  \code{(} MonoType \code{)}
307  
308  \ldots
309}
310
311By putting an arbitrary \textit{MonoType} expression in parentheses it can be used wherever an \textit{AtomType} is 
312allowed in a type expression.
313
314\subsection{Primitive Types}
315
316Since \cogent is intended as a system programming language, the predefined primitive types are mainly bitstring types.
317Additionally, there is a type for boolean values and an auxiliary string type.
318
319Syntactically, a primitive type is specified as a nullary type constructor:
320
321\vspace{2ex}
322\indprod{AtomType}{
323  \code{(} MonoType \code{)}
324  
325  TypeConstructor
326  
327  \ldots
328}
329
330\indprod{TypeConstructor}{
331  CapitalizedId
332}
333
334\subsubsection{Bitstring Types}
335
336The bitstring types are named \code{U8}, \code{U16}, \code{U32}, and \code{U64}.
337%, and \code{Char}.
338They denote strings of 8, 16, 32, or 64 bits, respectively.
339%, the type \code{Char} is a synonym for \code{U8}.
340
341The usual bitstring operations can be applied to values of the bitstring types, such as bitwise boolean 
342operations and shifting. Alternatively, bitstring values can be interpreted as unsigned binary represented
343numbers and the corresponding numerical operations can be applied. All numerical operations are done modulo the
344first value that is no more included in the corresponding type. E.g., numerical operations for values of
345type \code{U8} are done modulo $2^8 = 256$.
346
347\todo{application of \&\& and ||?}
348
349\subsubsection{Other Primitive Types}
350
351The other primitive types are \code{Bool} and \code{String}. Type \code{Bool} has the two values \code{True}
352and \code{False} with the boolean operations. Type \code{String} can only be used for specifying string literals,
353it supports no operations.
354
355
356
357\subsection{Composite Types}
358
359There are the following possibilities to construct composite types: records, tuples, variants, and functions. 
360
361\subsubsection{Tuple Types}
362
363A tuple type represents mathematical tuples, i.e., values with a fixed number of fields specified in a certain order. Every field may have a different type.
364A tuple type expression has the syntax:
365
366\vspace{2ex}
367\indprod{AtomType}{
368  \code{(} MonoType \code{)}
369  
370  TypeConstructor
371  
372  TupleType
373
374  \ldots
375}
376
377\indprod{TupleType}{
378  \code{()}
379
380  \code{(} MonoType \code{,} MonoType \{\code{,} MonoType\} \code{)}
381}
382
383The empty tuple type \code{()} is also called the \textit{unit} type. It has the empty tuple as its only value.
384
385Note that there is no 1-tuple type, a tuple type must either be the unit type or have at least two fields. Conceptually,
386a 1-tuple is equivalent to its single field. Syntactically the form \code{(} \textit{MonoType} \code{)} is used for
387grouping. 
388
389The type expression \code{(U8, U16, U16)} is an example for a tuple type with three fields.
390
391%Tuple types in \cogent are right associative: If the rightmost field in a tuple type T again has a tuple type, the type T is equivalent 
392%to the flattened type where the rightmost field is replaced by the fields according to its type. As an example, all the following types are equivalent:
393%\begin{verbatim}
394%  (U8, (U16, U16), (U8, Bool, U32))
395%  (U8, (U16, U16), U8, (Bool, U32))
396%  (U8, (U16, U16), U8, Bool, U32)
397%  (U8, (U16, U16), (U8, (Bool, U32)))
398%  (U8, ((U16, U16), U8, Bool, U32))
399%\end{verbatim}
400
401\subsubsection{Record Types}
402
403A record type is similar to a C struct, or a Haskell data type in record syntax. It consist of
404arbitrary many \textit{fields}, where each field has a name and a type. Accordingly, a record type expression
405has the following syntax:
406
407\vspace{2ex}
408\indprod{AtomType}{
409  \code{(} MonoType \code{)}
410  
411  TypeConstructor
412  
413  TupleType
414
415  RecordType
416  
417  \ldots
418}
419
420\indprod{RecordType}{
421  \code{\{} FieldName \code{:} MonoType \{\code{,} FieldName \code{:} MonoType\} \code{\}}
422}
423
424\indprod{FieldName}{
425  LowercaseId
426}
427
428The fields in a record type are order-sensitive. Therefore, the type expressions \code{\{a: U8, b: U16\}} and
429\code{\{b: U16, a: U8\}} denote different types.  A record type must always have atleast one field. 
430Other than for tuples, a record type may have a single field.
431Therefore, the type expressions \code{\{a: U8\}} and \code{U8} denote different types.
432
433\subsubsection{Variant Types}
434
435A variant type is similar to a union in C, or an algebraic data type in Haskell. As in Haskell, and
436unlike in C, a variant type is a \textit{discriminated} union:  each value is tagged with 
437the alternative it belongs to. 
438
439Depending on the tag, every value may have a ``payload'' which is a sequence of values, as in a tuple.
440A variant type specifies for every alternative the tag and the types of the payload values,  hence it has the syntax:
441
442\vspace{2ex}
443\indprod{AtomType}{
444  \code{(} MonoType \code{)}
445  
446  TypeConstructor
447  
448  TupleType
449
450  RecordType
451  
452  VariantType
453
454  \ldots
455}
456
457\indprod{VariantType}{
458   \code{<} DataConstructor \{TypeA2\} \{\code{|} DataConstructor \{TypeA2\}\} \code{>}
459}
460
461\indprod{DataConstructor}{
462  CapitalizedId
463}
464
465The tags are given by the \textit{DataConstructor} elements.  Since the payload is a sequence of values the 
466ordering of the \textit{TypeA2} matters. 
467
468The type expression \code{<Small U8 | Large U32>} is an example for a variant type with two alternatives, where the 
469payloads are single values of type \code{U8} and \code{U32}, respectively. Typical applications
470of variant types are for modelling error cases, such as in 
471
472\begin{verbatim}
473  <Ok U16 U32 U8 | Error U8>
474\end{verbatim}
475
476or for modelling optional values, such as in
477
478\begin{verbatim}
479  <Some U16 | None>
480\end{verbatim}
481
482 
483Although \textit{DataConstructor}s and \textit{TypeConstructor}s have the same syntax, they constitute different namespaces.
484A \textit{CapitalizedId} can be used to denote a \textit{DataConstructor} and a \textit{TypeConstructor} in the same
485context. In the example
486\begin{verbatim}
487  <Int U32 | Bool U8>
488\end{verbatim}
489the name of the predefined primitive type \code{Bool} is also used as a tag in a variant type.
490
491\subsubsection{Function Types}
492
493A function type corresponds to the the usual concept of function types in functional programming languages, as it is even 
494available in C. A function type has the syntax:
495
496\vspace{2ex}
497\indprod{MonoType}{
498  TypeA1
499  
500  FunctionType
501}
502
503\indprod{FunctionType}{
504   TypeA1 \code{->} TypeA1 
505}
506
507A function with type \code{U8 -> U16} maps values of type \code{U8} to values of type \code{U16}.
508
509Note, that a \textit{TypeA1}  cannot be a function type. Hence, to specify a higher order function type in \cogent, which 
510takes a function as argument or returns a functions as result, the argument or result type must be put in parentheses.
511
512In particular, the type expression \code{U8 -> U8 -> U16}, which is the usual way of specifying the type of a binary function in Haskell through
513currying, is illegal in \cogent. Strictly speaking, function types always describe unary functions. To specify the corresponding type 
514in \cogent use \code{U8 -> (U8 -> U16)}. Alternatively, a type expression for a binary function can
515be specified as \code{(U8,U8) -> U16} in \cogent, which is a different type.
516
517\subsection{Type Definitions}
518\label{def-type}
519
520Although all types in \cogent could be denoted by type expressions, types can be named by specifying a 
521\textit{type definition}. In the simplest case, a type definition introduces a name for a type expression,
522such as in the following example:
523
524\begin{verbatim}
525  type Fract = { num: U32, denom: U32 }
526\end{verbatim}
527
528Syntactically a type name is a \textit{TypeConstructor} in the same way as the primitive types. Hence, the 
529primitive types can be considered to be specific ``predefined'' type names.
530
531A type name defined in a type definition may be used in type expressions after the definition but also in type
532expressions occurring \textit{before} the type definition. In this way type definitions are ``global'', the 
533defined type names can be used everywhere in the \cogent program,  also in and from included files.
534
535An important restriction of \cogent is that type definitions may not be recursive, i.e., the type name may
536not occur in the type expression on the right-hand side. Thus the following type definition is illegal
537\begin{verbatim}
538  type Numbers = <Single U32 | Sequ (U32, Numbers)>
539\end{verbatim}
540because the defined type name \code{Numbers} occurs in the type expression. Also there may not be an indirect
541recursion, where type definitions refer to each other cyclically.
542
543\subsubsection{Generic Types}
544
545In a type definition it is also possible to define a \textit{TypeConstructor} which takes one or more
546\textit{type parameters}. Such a \textit{TypeConstructor} is called a \textit{generic} type. 
547An example would be
548
549\begin{verbatim}
550  type Pair a = (a,a)
551\end{verbatim}
552
553Here, the \textit{TypeConstructor} \code{Pair} is generic, it has the single type parameter \code{a}.
554
555In fact, a generic type like \code{Pair} is not really a type, it is a type \textit{constructor}. Only when it
556is applied to type \textit{arguments}, such as in \texttt{Pair U32}, it yields a type. Such a type is called
557a \textit{parameterized type}. Every generic type has a fixed \textit{arity}, which is its number of type
558parameters and specifies the number of arguments required in parameterized types constructed from it.
559
560A \textit{TypeConstructor} is non-generic, if it has arity 0. In this special case, the \textit{TypeConstructor}
561itself already denotes a type.
562
563Generic types in \cogent are known in Haskell as ``polymorphic types'' and similar concepts can be found in 
564several other programming languages. In Java, a generic class definition has the form \texttt{class Pair<A> \{\ldots\}}, 
565it defines the generic class \code{Pair} with its type parameter \code{A}. In C++ a similar concept is
566supported by ``templates''.
567
568The syntax for a type definition in \cogent supports both generic and non-generic types:
569
570\vspace{2ex}
571\indprod{TypeDefinition}{
572  \code{type} TypeConstructor \{TypeVariable\} \code{=} MonoType
573  
574  \ldots
575}
576
577\indprod{TypeVariable}{
578  LowercaseId
579}
580
581A \textit{TypeConstructor} defined this way is also called a \textit{type synonym}, since as a type expression
582it is strictly equivalent to the expression on the right-hand side in the definition. A type synonym with
583arity 0 is called a \textit{type name}.
584
585In the definition of a generic type, the type parameters may occur in the \textit{MonoType} on the right-hand side.
586There they are called \textit{type variables} and a type expression containing type variables is 
587called a \textit{polymorphic type}. To support polymorphic type expressions, the syntax allows type variables as
588\textit{AtomType}:
589
590\vspace{2ex}
591\indprod{AtomType}{
592  TypeConstructor
593  
594  TupleType
595
596  RecordType
597  
598  VariantType
599
600  TypeVariable
601}
602
603
604As in Haskell there is no syntactic difference between type variables and normal (term) variables. 
605However, type variables are syntactically different from type constructors, since the latter are capitalized identifiers,
606whereas variables begin with a lowercase letter.
607
608Since type variables are allowed as \textit{AtomType}, they can occur in a polymorphic type expression in all places
609where a type is allowed. 
610
611Note that in the definition of a generic type, all type variables occurring in the type expression on the right-hand
612side must be type parameters, declared on the left-hand side, i.e., they must all be bound in the type definition. 
613 The other way round, a type parameter need not occur as type variable in the type expression. In Haskell, this
614is called a ``phantom type''. Other than in Haskell in Cogent these types are not checked by the type checker, hence for
615
616\begin{verbatim}
617  type A a = U8
618\end{verbatim}
619the types \code{A U16} and \code{A Bool} are equivalent.
620
621
622Parameterized types are simply denoted by the generic type constructor followed by the required number of
623type expressions as arguments, such as in
624
625\begin{verbatim}
626  Pair U32
627\end{verbatim}
628
629They can be used in type expressions as \textit{TypeA1}:
630
631\vspace{2ex}
632\indprod{TypeA1}{
633  TypeA2
634  
635  ParameterizedType
636  
637  \ldots
638}
639
640\indprod{ParameterizedType}{
641  TypeConstructor \{TypeA2\}
642}
643
644Note that parameterized types must be put in parentheses if they are nested (used as argument of another parameterized type). 
645
646\subsubsection{Expanding Type Expressions}
647
648We call a parameterized type with a type synonym as \textit{TypeConstructor} a \textit{parameterized type synonym}.
649
650Since type definitions may not be recursive, type synonyms can always be eliminated from type expressions 
651by substituting the defining type expression for them, putting it in parentheses if necessary. 
652
653In the case of a parameterized type synonym also the type variables are 
654substituted by the actual type arguments. We call the result of eliminating (transitively) all type synonyms
655from a type expression the \textit{expansion} of the type expression.
656
657\subsubsection{Abstract Types}
658
659An \textit{abstract} type is similar to a type synonym without a definition. The idea of abstract types in \cogent is
660to provide the actual definition externally in accompanying C code. Hence abstract types are the \cogent way of
661interfacing C type definitions. However, since abstract types are used in \cogent in an opaque way, it is not necessary
662to know the external C definition for working with an abstract type in \cogent.  Note that abstract types are not
663meant to be used as interfaces to or abstractions of other \cogent types.
664
665Abstract types can be generic, i.e., they may have type parameters. The names of these type parameters are irrelevant,
666since there is no definition where they could occur as type variables. They are only used to specify the arity of
667the generic abstract type.
668
669The syntax for defining abstract types is the same as for normal type definitions, with the defining type expression 
670omitted:
671
672\vspace{2ex}
673\indprod{TypeDefinition}{
674  \code{type} TypeConstructor \{TypeVariable\} \code{=} MonoType
675  
676  AbstractTypeDefinition
677}
678
679\indprod{AbstractTypeDefinition}{
680  \code{type} TypeConstructor \{TypeVariable\}
681}
682
683The following examples define two abstract types. Type \code{Buffer} is non-generic, type \code{Array} is generic
684with arity 1:
685
686\begin{verbatim}
687  type Buffer
688  type Array a
689\end{verbatim}
690
691Like generic type synonyms, generic abstract types can be used to construct parameterized types:
692
693\begin{verbatim}
694  Array U16
695\end{verbatim}
696
697We call a parameterized type with an abstract type as its \textit{TypeConstructor} a \textit{parameterized abstract type}.
698Note that abstract types cannot be eliminated by expanding a type expression, since they have no definition.
699
700\section{Restricted Types}
701
702A type semantically determines a set of values as its extension. In most other typed programming languages the main
703consequence is that the type of a value restricts the functions which can be applied to it. 
704
705A specific feature of \cogent is that the type may impose additional restrictions on the ways a value can be used 
706in the program, in particular, how \textit{often} it may be used. This concept is known as \textit{linear types},
707it is also present in some other special programming languages, e.g., in Rust.
708
709Many types in \cogent do not impose additional restrictions, they behave like types in other programming languages,
710we call them \textit{regular types}. Types with additional restrictions are called \textit{restricted types}.
711
712\subsection{Linear Types}
713
714One kind of restricted types are \textit{linear types}. A linear type has the specific property, that its values must 
715be used \textit{exactly once} in the program. What this means is explained in Section~\ref{expr-usage}. Here it is 
716only relevant that a type may be linear or not.
717
718Linearity is an inherent property of type expressions. Type expressions as they have been described until now can either
719be linear or regular. To determine whether a type expression is linear or regular 
720its expansion is inspected using the following rules:
721\begin{itemize}
722\item Primitive types are regular.
723\item Record types are linear.
724\item Tuple types are linear if they contain at least one field with a linear type.
725\item Variant types are linear if they contain at least one alternative with a payload of linear type.
726\item Function types are regular.
727\item Parameterized and non-generic abstract types are linear.
728\end{itemize}
729
730Together, a type is linear when, after expanding all type synonyms, it has a component of a record or abstract type
731which does not appear as part of a function type.
732
733\subsection{Boxed and Unboxed Types}
734
735In order to decouple the property of linearity somewhat from the way how types are composed, the concept of 
736\textit{unboxed types} is used. Record types and abstract types, which may cause a type to be linear, are
737called \textit{boxed types}, the other types (primitive, tuple, variant, and function) are called \textit{unboxed types}.
738
739The type system is expanded by introducing the unbox type operator \code{\#}. For boxed types it produces
740an unboxed version. By applying the unbox type operator to all record types and abstract types
741in a type expression, the type expression becomes regular. 
742
743The operator \code{\#} is applied to a type expression as a prefix. To simplify the syntax it is allowed to
744be applied to arbitrary \textit{AtomType} expressions:
745
746\vspace{2ex}
747\indprod{TypeA2}{
748  AtomType
749
750  \code{\#} AtomType
751  
752  \ldots
753}
754
755By putting an arbitrary \textit{MonoType} in parentheses, the unbox operator can be applied to it, as in \code{\#(Array U8)}.
756
757If the unbox operator is applied to an \textit{AtomType} which is already unboxed, it has no effect. Hence, the type
758expressions \code{(U8,U16)} and \code{\#(U8,U16)} denote the same type, whereas \code{\{fld1:U8,fld2:U16\}} and
759\code{\#\{fld1:U8,fld2:U16\}} denote different types. 
760
761When applied to a record, the unbox operator affects only the record itself, not its fields. Hence, an unboxed
762record is still linear, if it has linear fields. The additional linearity rules for types resulting from 
763applying the unbox operator are
764\begin{itemize}
765\item Unboxed record types are linear if they contain at least one field with a linear type.
766\item Unboxed non-generic or parameterized abstract types are regular.
767\item For all other cases, an unboxed type is linear or regular according to the linearity of the type expression to which
768the unbox operator is applied.
769\end{itemize}
770
771As an example, if \code{A} is a non-generic abstract type, the type expression \code{\#(U8,A)} is linear, since
772the linear second field makes the type expression \code{(U8,A)} linear.
773
774\subsection{Partial Record Types}
775
776Since record types are linear, their values must be used exactly once, which also uses all their linear fields. 
777To support more flexibility, \cogent allows
778using linear record fields independently from the record itself, although each of them must still be used exactly once.
779This is done by separating the linear field's value from the rest of the record. The fact that the field value is no more present
780in the remaining record is reflected by the remaining record having a different type. These types are called 
781\textit{partial record types}. A record field for which the value is not present is called a \textit{taken} field.
782
783A partial record type is denoted by specifying a record type together with the names of the taken fields using the 
784following syntax:
785
786\vspace{2ex}
787\indprod{TypeA1}{
788  TypeA2
789  
790  TypeConstructor \{TypeA2\}
791  
792  PartialRecordType
793}
794
795\indprod{PartialRecordType}{
796  TypeA2 TakePut TakePutFields
797}
798
799\indselprod{TakePut}{
800  \code{take} \code{put}
801}
802
803\indprod{TakePutFields}{
804  FieldName
805
806  \code{(} [FieldName \{\code{,} FieldName\}] \code{)}
807  
808  \code{( .. )}
809}
810
811Thus \code{take} and \code{put} together with field names constitute type operators. The result of applying these
812type operators is usually a partial record type.
813
814When applied to a type R the operator \code{take (v,w)} produces the record type where at least fields
815\code{v} and \code{w} are taken, in addition to the fields that have already been taken in R.
816If the fields \code{v} and \code{w} are already taken in R, the compiler produces a warning. If R has no such fields
817 then applying the take operator is illegal. 
818
819The operator \code{put (v,w)} is dual to the take operator, it produces the record type where at least the fields 
820\code{v} and \code{w} are \textit{not} taken, in addition to the fields that have not been taken in R.
821If the fields \code{v} and \code{w} are not taken in R, the compiler produces a warning. If R has no such fields
822 then applying the put operator is illegal.
823
824The operator \code{take ( .. )} produces a record type where all fields are taken, the operator \code{put ( .. )} 
825produces the record type where no field is taken. Applying it to a type which is not a (boxed or unboxed) record type
826is illegal.
827
828If a take or put operator is applied to a boxed record type the result is again boxed, if applied to an unboxed record type
829the result is unboxed.
830
831Consider the following examples:
832\begin{verbatim}
833  type A
834  type B
835  type C
836  type R1 = {fld1: A, fld2: U8, fld3: B, fld4: C}
837  type R2 = R1 take fld1
838  type R3 = R1 take ( .. )
839\end{verbatim}
840
841Types \code{R1, R2, R3} are all boxed and thus linear. The type expressions 
842\begin{verbatim}
843  R1 take (fld1, fld2)
844  R2 take (fld1, fld2)
845  R2 take fld2
846  R3 put (fld3, fld4)
847\end{verbatim}
848are all equivalent. The type expressions \code{R3 put ( .. )} and \code{R2 put ( .. )} are both equivalent to
849type \code{R1}.
850
851An unboxed record type without linear fields is regular. The same holds for unboxed partial record types if all
852linear fields are taken. Thus the additional linearity rules for partial record types are
853\begin{itemize}
854\item Partial boxed record types are linear.
855\item Partial unboxed record types are linear if they contain at least one nontaken field with a linear type.
856\end{itemize}
857
858\subsection{Readonly Types}
859
860Since the restrictions for using values of a linear type are rather strong, \cogent supports an additional kind
861of types, the \textit{readonly types}. The use of values of a readonly type is also restricted, however, in a
862different way: they can be used any number of times but they may not be modified. Again,
863the meaning of this is explained in Section~\ref{expr-usage}.
864
865\subsubsection{The bang Operator}
866
867All type expressions defined until now are not readonly. The only way to construct a readonly type is by applying
868the type operator \code{!}, which is pronounced ``bang''. This operator may be applied to an \textit{AtomType} 
869in postfix notation:
870
871\vspace{2ex}
872\indprod{TypeA2}{
873  AtomType
874
875  \code{\#} AtomType
876
877  AtomType \code{!}
878}
879
880By putting an arbitrary \textit{MonoType} in parentheses the bang operator can be applied to it.
881
882Readonly types are considered as an alternative to linear types, hence regular types are never readonly: If the 
883bang operator is applied to a regular type A the resulting type is equivalent to A. Only if the bang operator
884is applied to a linear type a readonly type may result.
885
886Unlike the unbox operator the bang operator also affects subexpressions such as record fields and abstract types. If in
887type A a field has type F then in type A! the same field has type F!. An exception are function types: if a bang
888operator is applied to a function type it is not applied to argument and result types. 
889As a result of this recursive application of the bang operator, it turns every linear type into a non-linear type. 
890
891\subsubsection{Escape-restricted Types}
892
893A concept related to readonly types are \textit{escape-restricted types}. A type is escape-restricted if it is readonly
894or if it has an escape-restricted component. This definition implies, that readonly types are always escape-restricted. The opposite
895is not true, there are escape-restricted types which are not readonly. An example is the type
896\begin{verbatim}
897  #{fld1: U8, fld2: {f1: U16}! }
898\end{verbatim}
899It is not readonly since the bang operator is not applied to it. However, it has the field \code{fld2}
900with a readonly type, therefore it is escape-restricted.
901
902We call a type which is not escape-restricted an ``escapable'' type.
903
904A linear type always is a boxed record or abstract type or it contains a component of such a type. When the bang
905operator is applied to the linear type, it will recursively be applied to that component, turning it into a 
906component of readonly type. Therefore, the result of applying the bang operator to a linear type will always
907be an escape-restricted type which is not linear.
908
909There are even types which are linear and escape-restricted, such as the boxed record type
910\begin{verbatim}
911  {fld1: U8, fld2: {f1: U16}! }
912\end{verbatim}
913or the unboxed record with a field of linear type and a field of readonly type:
914\begin{verbatim}
915  #{fld1: {f1: U16}, fld2: {f1: U16}! }
916\end{verbatim}
917
918If all escape-restricted fields are taken from a record, the resulting partial record type is escapable.
919An example is the type
920\begin{verbatim}
921  {fld1: U8, fld2: {f1: U16}! } take (fld2)
922\end{verbatim}
923
924As the other restricted types, escape-restricted types impose additional restrictions on the use of their values: they 
925may not ``escape'' from certain context. Again, the meaning of this is explained in Section~\ref{expr-usage}.
926
927Together we have the following properties for type expressions: A type expression can be regular or restricted. If it is restricted 
928it can be linear, escape-restricted, or both. A readonly type is always escape-restricted but never linear.
929
930\chapter{Working with Values}
931
932The main part of a \cogent program is usually about specifying values, typically the result values of functions, depending on argument values. 
933
934\section{Patterns}
935
936Functional programming languages typically use the concept of \textit{pattern matching}, which covers several concepts from imperative or 
937object oriented languages, such as binding values to variables, accessing components of a value, or testing for alternatives. In \cogent 
938patterns are the most important language construct for working with composite values.
939
940A pattern is a syntactical language construct which can be \textit{matched} against values. A pattern may contain \textit{variables}, 
941then matching it with a value has the effect of \textit{binding} the contained variables to components of the matched value. In \cogent, 
942 as  in languages like Haskell or Scala, a variable may occur atmost once in a pattern. Hence it is not possible to construct patterns 
943which restrict matching values to have some parts which are equal to each other.
944
945A pattern \textit{conforms} to a type, if it matches at least one value of the type. A pattern can conform to several different types. 
946A pattern is \textit{irrefutable}, if it matches all values of all its conforming types. Irrefutable patterns cannot be used to discriminate 
947between different sets of values, they can only be used to bind contained variables.  If a pattern is matched
948against a value, the match must always be exhaustive, i.e. alternative patterns must be specified which together cover
949the value's type. 
950
951The conforming types of a pattern can always be inferred from the syntactical structure of the pattern. Therefore type expressions are not
952needed as part of patterns.
953
954Syntactically, patterns may be put in parentheses for grouping:
955
956\vspace{2ex}
957\indprod{Pattern}{
958  \code{(} Pattern \code{)}
959  
960  \ldots
961}
962
963A pattern in parentheses is equivalent to the pattern itself.
964
965\subsection{Simple Patterns}
966
967The simplest patterns consist of only one part. They may be irrefutable or not.
968
969\subsubsection{Irrefutable Simple Patterns}
970
971A simple pattern can be a single variable or the special symbol \code{\_}, which is called the \textit{wildcard} pattern:
972
973\vspace{2ex}
974\indprod{Pattern}{
975  \code{(} Pattern \code{)}
976  
977  IrrefutablePattern
978
979  \ldots
980}
981
982\indprod{IrrefutablePattern}{
983  Variable 
984  
985  WildcardPattern
986  
987  \ldots
988}
989
990\indprod{Variable}{
991  LowercaseId
992}
993
994\indprod{WildcardPattern}{
995  \code{\_}
996}
997
998Both patterns conform to all types and are irrefutable, hence they match every possible value which may occur in \cogent. If a variable \code{x} 
999is matched with a value, the value is bound to \code{x}. This means that in a certain \textit{scope}, the value can be referenced by denoting 
1000the variable \code{x}.
1001
1002The \textit{WildcardPattern} \code{\_} contains no variable, therefore no variable can be bound when the pattern is matched with a value. 
1003The \textit{WildcardPattern} is used when, for some reason, a value must be matched with a pattern but need not be referenced afterwards.
1004
1005As for type and data constructors, the syntactically equal \textit{Variable}s, \textit{FieldName}s, and \textit{TypeVariables}
1006constitute three different namespaces. The same lowercase identifier can be used to denote a term variable, a type variable, and
1007a record field in the same context without imposing any relation among them.
1008
1009\subsubsection{Refutable Simple Patterns}
1010
1011Refutable simple patterns consist of a single literal:
1012
1013\vspace{2ex}
1014\indprod{Pattern}{
1015  \code{(} Pattern \code{)}
1016  
1017  IrrefutablePattern
1018
1019  LiteralPattern
1020  
1021  \ldots
1022}
1023
1024\indprod{LiteralPattern}{
1025  BooleanLiteral
1026  
1027  IntegerLiteral
1028  
1029  CharacterLiteral
1030}
1031
1032A \textit{LiteralPattern} matches exactly one value, the value which is denoted by the literal. A \textit{BooleanLiteral}  conforms only to type
1033\code{Bool}, a \textit{CharacterLiteral} conforms  only to type \code{U8}.
1034
1035An \textit{IntegerLiteral} conforms to every bitstring type which includes the value denoted by the literal. For example, the literal 100000 
1036conforms to types \code{U32} and \code{U64} but not to \code{U16} or \code{U8}.
1037
1038Since a \textit{LiteralPattern} contains no variables, no variable can be bound when it is matched with a value. \textit{LiteralPattern}s are used 
1039for discriminating the value from other values, not for binding variables.
1040
1041
1042Note that you cannot use a value bound to a variable like a literal in a pattern to match just that value. If a variable
1043occurs in a pattern it is always used for a new binding which shadows any value already bound to it. In particular,
1044this applies to variables bound in a topevel value definition (see Section~\ref{value-def}).
1045
1046
1047\subsection{Composite Patterns}
1048
1049Composite patterns conform to composite types. However, there are no patterns which conform to function types.
1050
1051\subsubsection{Tuple Patterns}
1052
1053A tuple pattern is syntactically denoted by a tuple of patterns:
1054
1055\vspace{2ex}
1056\indprod{IrrefutablePattern}{
1057  Variable 
1058  
1059  WildcardPattern
1060  
1061  TuplePattern
1062  
1063  \ldots
1064}
1065
1066\indprod{TuplePattern}{
1067  \code{()}
1068
1069  \code{(} IrrefutablePattern \code{,} IrrefutablePattern \{\code{,} IrrefutablePattern\} \code{)}
1070}
1071
1072The subpatterns in a tuple pattern must all be irrefutable. As a consequence, tuple patterns are also irrefutable.
1073Even the tupel pattern \code{()} is irrefutable, although it matches only a single value. Since it conforms only to the
1074unit type which has only this single value, it satisfies the requirements for an irrefutable pattern.
1075
1076Note that, as for tuple types, there is no tuple pattern with only one subpattern, the corresponding syntactical 
1077construct like \code{(v)} is a pattern in parentheses and conforms to all types the inner pattern conforms to, not
1078only to tuple types.
1079
1080A tuple pattern \code{(}$p_1$, \ldots, $p_n$\code{)} with $n \neq 1$ conforms to every tuple type with $n$ fields where each subpattern
1081$p_i$ conforms to the type of the $i$th field. 
1082
1083%Since tuple types are right associative, the pattern also conforms to all
1084%tuple types with more than $n$ fields, if the rightmost pattern $p_n$ conforms to the tuple type built from the remaining fields
1085%starting with the $n$th field.
1086
1087If a tuple pattern is matched with a value, the subpatterns are matched with the corresponding fields of the value. 
1088
1089%If the value 
1090%has more fields, subpattern $p_n$ is matched with the tuple of all remaining fields. 
1091
1092A useful case is a tuple pattern where all 
1093subpatterns are (distinct) variables. Such a pattern can be used to bind all fields of a tuple value to variables for subsequent access.
1094
1095Here are some examples for tuple patterns:
1096\begin{verbatim}
1097  (v1, v2, v3)
1098  (v1, (v21, v22), _)
1099  ()
1100\end{verbatim}
1101The first pattern conforms to all tupel types with three fields. The second pattern conforms to all tuple types with
1102three fields where the second field has a tuple type with two fields. The third pattern only conforms to the unit type.
1103
1104\subsubsection{Record Patterns}
1105\label{pat-rec}
1106
1107Patterns for record values exist in two syntactical variants, depending on whether the record is boxed or unboxed:
1108
1109\vspace{2ex}
1110\indprod{IrrefutablePattern}{
1111  Variable 
1112  
1113  WildcardPattern
1114
1115  TuplePattern
1116  
1117  RecordPattern
1118}
1119
1120\indprod{RecordPattern}{
1121  Variable \code{\{} RecordMatchings \code{\}}
1122  
1123  \code{\#} \code{\{} RecordMatchings \code{\}}
1124}
1125
1126The main part \textit{RecordMatchings} of a record pattern is used to match the fields and has the following syntax:
1127
1128\vspace{2ex}
1129\indprod{RecordMatchings}{
1130   RecordMatching \{\code{,} RecordMatching\} 
1131
1132  
1133}
1134
1135\indprod{RecordMatching}{
1136  FieldName [= IrrefutablePattern]
1137}  
1138
1139The basic case is a sequence of field names with associated subpatterns, such as in 
1140\begin{verbatim}
1141  fld1 = v1, fld2 = (v21, v22), fld3 = _
1142\end{verbatim}
1143A record pattern with these \textit{RecordMatchings} conforms to all record types which have atleast three fields named
1144\code{fld1}, \code{fld2}, and \code{fld3}, and where \code{fld2} has a tuple type with two fields. More general, a record pattern 
1145where the \textit{RecordMatchings} consist of pairs of field names and subpatterns conforms to all record types which have atleast the named 
1146(untaken) fields and every subpattern conforms to the corresponding field type. Since all subpatterns must be irrefutable, the record pattern 
1147is irrefutable as well.
1148
1149A special application of a record pattern is to bind field values to local variables which have the same name as the field itself. The effect 
1150is to make the fields of a record value locally accessible using their field names. This can be accomplished for a specific field by matching 
1151a record pattern with a \textit{RecordMatching} of the form \code{fldi = fldi}. Such a \textit{RecordMatching} can be abbreviated by simply 
1152specifying the field name alone: \code{fldi}, for example in the \textit{RecordMatchings}
1153\begin{verbatim}
1154  fld1, fld2 = (v21, v22), fld3, fld4
1155\end{verbatim}
1156Note that since the field name as a variable conforms to all types, the corresponding record patterns conform to all record types which have a 
1157(untaken) field named \code{fldi}, irrespective of the field type.
1158
1159
1160
1161A record pattern starting with \code{\#} conforms only to unboxed record types. When matched with a value,  for every 
1162field according to the value's type a subpattern must be present in the 
1163\textit{RecordMatchings} and is matched to the corresponding field value. 
1164
1165A record pattern starting with a \textit{Variable} conforms  to boxed and unboxed record types. 
1166When matched with a value this variable is bound to the 
1167remaining record after matching the subpatterns in the \textit{RecordMatchings}. 
1168This ``remaining'' record has as its type the type of the 
1169matched value with all fields taken which are matched in the \textit{RecordMatchings}.  Matching
1170a pattern of this kind with a value is called a ``take operation''. 
1171
1172The rationale for this is that boxed record types are 
1173linear and their values must be used exactly once. Matching only some fields would only use these 
1174fields and not the rest, which is not allowed. 
1175Hence the remaining record must also be matched so that it can be used as well. 
1176Even when all linear fields are matched the remaining 
1177record itself is still linear and must be preserved.
1178
1179
1180If value \code{val} has type 
1181\begin{verbatim}
1182  {fld1: U8, fld2: U16, fld3: U32}
1183\end{verbatim}
1184an example take operation would be to match the pattern 
1185\begin{verbatim}
1186  v {fld1 = v1, fld3 = v3}
1187\end{verbatim}
1188with \code{val}. This will bind \code{v1} to the value of the first field, \code{v3} to the value of the
1189third field, and \code{v} to the remaining record of type
1190\begin{verbatim}
1191  {fld1: U8, fld2: U16, fld3: U32} take (fld1,fld3)
1192\end{verbatim}
1193where only the second field is still present.
1194
1195
1196Although the ordering of fields is relevant in a record type expressionm, it is irrelevant in a record pattern. 
1197Therefore the record pattern
1198\code{\#\{fld1 = v1, fld2 = v2\}} conforms to the types
1199
1200\begin{verbatim}
1201  #{fld1: U8, fld2: U16}
1202  #{fld2: U32, fld1: U32}
1203\end{verbatim}
1204and all other unboxed record types which have two fields named \code{fld1} and \code{fld2}.
1205
1206
1207When a field of non-linear type is taken from a (boxed or unboxed) record value, a copy of it could remain
1208in the record and could be taken again. \cogent does not allow this, non-linear fields can also be taken
1209only once. This way it is possible to represent uninitialized fields in a record by specifying the record 
1210type with the corresponding fields being taken.
1211
1212
1213\subsubsection{Variant Patterns}
1214
1215A variant pattern consists of a data constructor and a subpattern for every payload value in the corresponding alternative:
1216
1217\vspace{2ex}
1218\indprod{Pattern}{
1219  \code{(} Pattern \code{)}
1220  
1221  IrrefutablePattern
1222
1223  LiteralPattern
1224  
1225  VariantPattern
1226}
1227
1228\indprod{VariantPattern}{
1229   DataConstructor \{IrrefutablePattern\}
1230}
1231
1232A variant pattern conforms to every variant type which has atleast an alternative with the \textit{DataConstructor} as its tag. Although a variant 
1233pattern matches all values of the type having only that alternative, this is not true for all other conforming types. For those types the pattern 
1234only matches the subset of value sequences which have been constructed with the \textit{DataConstructor} as its discriminating tag. Therefore variant 
1235patterns are always refutable.  As usual, when matched with a value, the match must be exhaustive, specifying
1236a pattern for every alternative. 
1237
1238
1239When a variant pattern is (successfully) matched with a value, the subpatterns are matched with the payload values.
1240
1241
1242The following is an exmple for a variant pattern:
1243
1244\begin{verbatim}
1245  TwoDim x, y
1246\end{verbatim}
1247
1248It conforms, e.g., to the variant type 
1249
1250\begin{verbatim}
1251  <TwoDim U32 U32 | ThreeDim U32 U32 U32>
1252\end{verbatim}
1253and generally to every type with a variant tagged with \code{TwoDim} and having two values. When it is matched 
1254with a value tagged with \code{TwoDim} the first payload value is bound to \code{x} and the second payload value is bound to \code{y}. 
1255The pattern also conforms to the variant type
1256\begin{verbatim}
1257  <TwoDim U32 U32>
1258\end{verbatim}
1259Although it matches all values of this type, it is still a refutable pattern, even if no other variant types with \code{TwoDim}
1260exist in the program.
1261
1262\section{Expressions}
1263
1264As usual in programming languages, an \textit{expression} denotes a way how to calculate a value. The actual calculation of a value according 
1265to an expression is called an \textit{evaluation} of the expression. Since an expression may contain variables which are not bound in the expression 
1266itself (``free variables''), the value obtained by evaluating an expression may depend on the context in which the free variables are bound.
1267
1268Usually, when an expression occurs in a \cogent program, a type may be \textit{inferred} for it. There are several ways to infer an expression's type.
1269The most basic way is to infer its type from its syntactical structure, although there are cases where that is not possible.
1270 If an expression has an
1271inferred type, the value resulting from evaluating the expression always belongs to this type.
1272
1273The general syntactical levels of expressions are as follows:
1274
1275\vspace{2ex}
1276\indprod{Expression}{
1277  BasicExpression
1278
1279  \ldots
1280}
1281
1282\indprod{BasicExpression}{
1283  BasExpr
1284  
1285  \ldots
1286}
1287
1288\indprod{BasExpr}{
1289  Term
1290  
1291  \ldots
1292}
1293
1294\indprod{Term}{
1295  \code{(} Expression \code{)}
1296  
1297  \ldots
1298}
1299
1300Every \textit{Expression} can be used wherever a \textit{Term} is allowed by putting it in parentheses.
1301
1302\subsection{Terms}
1303
1304The simplest expressions are called \textit{terms}. A term specifies a value directly or, for a composite value, by specifying its parts. 
1305
1306A term can be a single variable, denoting the value which has been bound to the variable in the context. 
1307
1308\vspace{2ex}
1309\indprod{Term}{
1310  \code{(} Expression \code{)}
1311  
1312  Variable
1313
1314  \ldots
1315}
1316
1317From the variable alone no type can be inferred. However, a type may be inferred when the variable is bound. Then this type is 
1318also inferred for every occurence of the variable as a term in its scope.
1319
1320\subsubsection{Literal Terms}
1321
1322Terms for values of primitive types are simply the literals:
1323
1324\vspace{2ex}
1325\indprod{Term}{
1326  \code{(} Expression \code{)}
1327  
1328  Variable
1329
1330  LiteralTerm
1331  
1332  \ldots
1333}
1334
1335\indprod{LiteralTerm}{
1336  BooleanLiteral
1337  
1338  IntegerLiteral
1339  
1340  CharacterLiteral
1341  
1342  StringLiteral
1343}
1344
1345The inferred type for a \textit{BooleanLiteral}, a \textit{CharacterLiteral}, or a \textit{StringLiteral} is \code{Bool},
1346\code{U8}, or \code{String}, respectively.
1347The inferred type for a \textit{IntegerLiteral} is the smallest bitstring type covering the value, thus the literal 
1348\code{200} has inferred type \code{U8}, whereas the literal \code{300} has inferred type \code{U16} and \code{100000} has 
1349inferred type \code{U32}.
1350
1351\subsubsection{Terms for Tuple Values}
1352
1353Terms for tuple values are written as in most other programming languages supporting tuples:
1354
1355\vspace{2ex}
1356\indprod{Term}{
1357  \code{(} Expression \code{)}
1358  
1359  Variable
1360
1361  LiteralTerm
1362  
1363  TupleTerm
1364  
1365  \ldots
1366}
1367
1368\indprod{TupleTerm}{
1369  \code{()}
1370  
1371  \code{(} Expression \code{,} Expression \{\code{,} Expression\} \code{)}
1372}
1373
1374Again, as for tuple types and patterns, a single \textit{Expression} in parentheses is not a tuple term but
1375is only syntactically grouped.
1376
1377An example tuple term is
1378\begin{verbatim}
1379  (15, 'x', 42, ("hello", 1024))
1380\end{verbatim}
1381which specifies 4 subexpressions for the fields, separated by commas.
1382
1383The type inferred from the structure of a tuple term is the tuple type with the same number of fields as are present in the term, where
1384the field types are the types inferred for the subexpressions. If one of the subexpressions does not have an
1385inferred type then no type can be inferred from the tuple term's structure.
1386
1387%Since tuple types are right associative, the same holds for the tuple terms. Hence, the example term is equivalent
1388%to the terms
1389%\begin{verbatim}
1390%  (15, 'x', 42, "hello", 1024)
1391%  (15, ('x', (42, ("hello", (1024)))))
1392%\end{verbatim}
1393%but not to the term
1394%\begin{verbatim}
1395%  (15, ('x', 42), "hello", 1024)
1396%\end{verbatim}
1397
1398\subsubsection{Terms for Record Values}
1399
1400\cogent only suppoprts terms for unboxed record values. Boxed record values cannot be specified directly, they must
1401always be created externally in a C program part and passed to \cogent as (part of) a function argument or result.
1402
1403The syntax for terms for unboxed values specifies all field values together with the field names:
1404
1405\vspace{2ex}
1406\indprod{Term}{
1407  \code{(} Expression \code{)}
1408  
1409  Variable
1410
1411  LiteralTerm
1412  
1413  TupleTerm
1414  
1415  RecordTerm
1416  
1417  \ldots
1418}
1419
1420\indprod{RecordTerm}{
1421  \code{\#} \code{\{} RecordAssignments \code{\}}
1422}
1423
1424\indprod{RecordAssignments}{
1425  RecordAssignment \{\code{,} RecordAssignment\}
1426}
1427
1428\indprod{RecordAssignment}{
1429  FieldName [\code{=} Expression]
1430}
1431
1432An example is the record term
1433\begin{verbatim}
1434  #{fld1 = 15, fld2 = 'x', fld3 = 42, fld4 = ("hello", 1024)}
1435\end{verbatim}
1436which specifies 4 subexpressions for the fields, separated by commas. The field names must be pairwise different.
1437As for record types, but other than for record patterns, the order of the field specifications is significant. Hence
1438the term 
1439\begin{verbatim}
1440  #{fld2 = 'x', fld3 = 42, fld1 = 15, fld4 = ("hello", 1024)}
1441\end{verbatim}
1442evaluates to a different value than the first example term.
1443
1444The type inferred from a record type's structure is the unboxed record type with the same number of fields in the same order 
1445as are present in the expression, named according to the names given in the term. The field types are the types inferred
1446for the subexpressions. If a subexpression has no inferred type, no type can be inferred from the record term's structure.
1447
1448\subsubsection{Terms for Values of Variant Types}
1449
1450A term for a value of a variant type specifies the discriminating tag and the actual payload values:
1451
1452\vspace{2ex}
1453\indprod{Term}{
1454  \code{(} Expression \code{)}
1455  
1456  Variable
1457
1458  LiteralTerm
1459  
1460  TupleTerm
1461  
1462  RecordTerm
1463
1464  VariantTerm
1465  
1466  \ldots
1467}
1468
1469\indprod{VariantTerm}{
1470   DataConstructor \{Term\}
1471}
1472
1473Examples for such terms are
1474
1475\begin{verbatim}
1476  Small 42
1477  TwoDim 3 15
1478\end{verbatim}
1479
1480
1481For a \textit{VariantTerm} it is not possible to infer a type from its structure, since there may be several 
1482variant types using the same \textit{DataConstructor}. The \cogent compiler even does not infer the type if there
1483is only one variant type using the \textit{DataConstructor} as tag.
1484
1485\subsubsection{Terms for Values of Function Types}
1486\label{term-lambda}
1487
1488A term for a value of a function type is, as usual, called a \textit{lambda expression}. Often in other programming languages, a lambda
1489expression consists of a body expression and a variable for every argument. In \cogent all functions take only one
1490argument, therefore only one variable is needed. However, more general than a variable, an irrefutable pattern may be 
1491used. Every application of such a function is evaluated by first matching the pattern against the argument value,
1492thus binding all variables contained in the pattern. Then the body expression is evaluated in the context of
1493the bound variables to yield the result.
1494
1495The syntax for lambda expressions is:
1496
1497\vspace{2ex}
1498\indprod{Term}{
1499  \code{(} Expression \code{)}
1500  
1501  Variable
1502
1503  LiteralTerm
1504  
1505  TupleTerm
1506  
1507  RecordTerm
1508
1509  VariantTerm
1510  
1511  LambdaTerm
1512  
1513  \ldots
1514}
1515
1516\indprod{LambdaTerm}{
1517  \code{$\backslash$} IrrefutablePattern [\code{:} MonoType] \code{=>} Expression
1518}
1519
1520Optionally, the argument type may be specified explicitly after the pattern. If no unique conforming type can be inferred for
1521the pattern, the argument type is mandatory.
1522
1523Examples for lambda terms are
1524
1525\begin{verbatim}
1526  \x => (x,x)
1527  \(x,y,z) (U8, U8, Bool) => #{fld1 = y, fld2 = (x,z)}
1528  \(x,y) : (U32,U32) => TwoDim y x
1529\end{verbatim}
1530
1531In the first case the argument type must be known from the context by knowing an inferred type for the lambda term,
1532for example the type \code{U8 -> (U8,U8)}. In the third case the result type must be known from the context by knowing
1533an inferred type for the lambda term, for example the type 
1534
1535\begin{verbatim}
1536  (U32,U32) -> <TwoDim U32 U32 | Error U8>
1537\end{verbatim}
1538
1539
1540The body expression in a lambda term is restricted to not contain any free non-global variables. Non-global variables
1541are variables bound by pattern matching in contrast to \textit{global} variables which are bound by a toplevel definition
1542(see Section~\ref{toplevel-def}).
1543
1544If the body expression of a lambda term has inferred type T2 and the argument type is explicitly specified as T1 then
1545the type inferred from the structure of the \textit{LambdaTerm} is T1 \code{->} T2.
1546
1547\subsection{Basic Expressions}
1548
1549Basic expressions are constructed from terms in several ways, which all correspond semantically to a function application.
1550
1551\subsubsection{Plain Function Application}
1552
1553As is typical for functional programming languages, a value in \cogent can be a function and it can be applied to arguments.
1554
1555As we have seen with function types, in \cogent all functions have only one argument. Hence, an expression for a function application 
1556consists of a term for the function and a second term for the argument:
1557
1558\vspace{2ex}
1559\indprod{BasExpr}{
1560  Term
1561  
1562  FunctionApplication
1563  
1564  \ldots
1565}
1566
1567\indprod{FunctionApplication}{
1568  BasExpr BasExpr
1569}
1570
1571The argument Expression is simply put after the Expression for the function. This is common in functional programming languages, whereas in
1572imperative and object oriented languages (and in mathematics) the argument is usually put in parantheses like in $f(x)$. In Cogent
1573this is allowed, since a \textit{BasExpr} may be an expression in parentheses, but it is not necessary.
1574
1575The syntax here is ambiguous. Several \textit{BasExpr} in a row are interpreted as left associative. Therefore the following
1576two \textit{BasExpr} are equivalent:
1577\begin{verbatim}
1578  f 42 17 4
1579  ((f 42) 17) 4
1580\end{verbatim}
1581
1582If the first \textit{BasExpr} in a \textit{FunctionApplication} has an inferred type it must be a function type T1 \code{->} T2.
1583If the second \textit{BasExpr} has an inferred type it must be equal to T1. The type inferred from the \textit{FunctionApplication}'s
1584structure is type T2.
1585
1586As an example, if the variable \code{f} is bound to a function of type \code{U8 -> U16} then the basic expression
1587\begin{verbatim}
1588  f 42
1589\end{verbatim}
1590is a \textit{FunctionApplication} with a result of type \code{U16}.
1591
1592\subsubsection{Operator Application}
1593
1594In \cogent there is a fixed set of predefined functions. These functions are denoted by \textit{operator symbols} which are syntactically
1595different from variables. In contrast to normal functions, predefined functions may be binary, i.e. take two arguments. Binary 
1596operator applications are written in infix notation:
1597
1598\vspace{2ex}
1599\indprod{BasExpr}{
1600  Term
1601  
1602  FunctionApplication
1603
1604  OperatorApplication
1605  
1606  \ldots
1607}
1608
1609\indprod{OperatorApplication}{
1610  UnaryOP BasExpr
1611  
1612  BasExpr BinaryOp BasExpr
1613}
1614
1615\indselprod{UnaryOp}{
1616  \code{upcast complement not}
1617}
1618
1619\indselprod{BinaryOp}{
1620   \code{o * / \% + - >= > == /= < <= .\&. .\^{}. .|. >{}> <{}< \&\& || \$} 
1621}
1622
1623As usual in most programming languages, the syntax here is ambiguous and operator precedence rules are used
1624for disambiguation. The precedence levels ordered from stronger to weaker binding are:
1625
1626\begin{verbatim}
1627  upcast complement not <plain function application>
1628  o
1629  * / %
1630  + -
1631  < > <= >= == /=
1632  .&.
1633  .^.
1634  .|.
1635  << >>
1636  &&
1637  ||
1638  $
1639\end{verbatim}
1640
1641Note that plain function application is treated like a binary invisible operator, where the first argument is the 
1642applied function and the second argument is the argument to which the function is applied. 
1643
1644When binary operators on the same level are combined they are usually left associative, with the exception of 
1645 \code{o}, \code{\&\&}, \code{||} and \code{\$}  which are right associative and \code{<, >, <=, >=, ==, /=} which 
1646cannot be combined.
1647
1648\todo{describe all operation semantics and inferred types}
1649
1650\subsubsection{Put Expressions}
1651\label{expr-put}
1652
1653A common function used in functional programming languages is the record update function. It takes a record
1654value and returns a new record value where one or more field values differ. In \cogent the 
1655application of this function is restricted: if a field has a linear type, it cannot be replaced, since then
1656its old value would be discarded without being used. In this case the field can only be replaced, when
1657it has been taken in the old value. For this reason the record update function is called the ``put function''
1658in \cogent. For non-linear fields the put function may either put a value into a taken field or replace
1659the value of an untaken field.
1660
1661\cogent supports a \textit{PutExpression} as specific syntax for applying the put function. It specifies the old record value and 
1662a sequence of new field values together with the corresponding field names:
1663
1664\vspace{2ex}
1665\indprod{BasExpr}{
1666  Term
1667  
1668  FunctionApplication
1669
1670  OperatorApplication
1671  
1672  PutExpression
1673  
1674  \ldots
1675}
1676
1677\indprod{PutExpression}{
1678  BasExpr \{ RecordAssignments \}
1679}
1680
1681As an operator the \textit{RecordAssignments} have the same precedence as plain function application and the unary operators.
1682
1683If a type T is inferred for the leading \textit{BasExpr} in a \textit{PutExpression}, T must satisfy the following conditions: it must
1684be a (boxed or unboxed) record type having all fields occuring in the \textit{RecordAssignments}. If such a field has
1685a linear type it must be taken in T. The type inferred from the structure of the \textit{PutExpression} then is\\
1686\hspace*{1cm} T \code{put} \code{(}fld1\code{,}\ldots\code{,}fldn\code{)}\\
1687where fld1,\ldots,fldn are all fields occurring in the \textit{RecordAssignments}.
1688
1689Unlike in a record term, the field order in a \textit{PutExpression} is not significant.
1690
1691If the variable \code{r} is bound to a value of type \code{R} where
1692\begin{verbatim}
1693  typedef A
1694  typedef R = {fld1: A, fld2: U32, fld3: (Bool,U8), fld4: A} 
1695              take (fld3,fld4)
1696\end{verbatim}
1697and variable \code{a} is bound to a value of type \code{A}, then the following are valid put expressions:
1698\begin{verbatim}
1699  r {fld2 = 55, fld3 = (True, 17)}
1700  r {fld4 = a, fld2 = 10000}
1701\end{verbatim}
1702The first expression has inferred type \code{R put (fld2,fld3)} which is equal to the type
1703\begin{verbatim}
1704  {fld1: A, fld2: U32, fld3: (Bool,U8), fld4: A} take (fld4)
1705\end{verbatim}
1706The expression \code{r \{fld1 = a\}} is invalid since field \code{fld1} is untaken and has linear type.
1707
1708\subsubsection{Member Access}
1709
1710A second function commonly provided for records is \textit{member access} or projection, often denoted by a separating dot
1711in programming languages. \cogent provides the same syntax for member access:
1712
1713\vspace{2ex}
1714\indprod{BasExpr}{
1715  Term
1716  
1717  FunctionApplication
1718
1719  OperatorApplication
1720  
1721  PutExpression
1722  
1723  MemberAccess
1724}
1725
1726\indprod{MemberAccess}{
1727  BasExpr \code{.} FieldName
1728}
1729
1730Here, the \textit{BasExpr} specifies the record value and the \textit{FieldName} specifies the name of the field to be accessed.
1731As an operator, the dot in a \textit{MemberAccess} has the highest precedence, higher than the unary operators.
1732
1733Again, in \cogent the use of member access is restricted. The type inferred for the leading \textit{BasExpr} in a \textit{MemberAccess}
1734must be either an unboxed record type or a readonly boxed record type. Then it is possible to use the value of only one field
1735without caring about the other fields. Moreover, also the type of the accessed field must be non-linear, since
1736in addition to being accessed, its value also remains in the record, hence it could be used twice.
1737
1738The type inferred from the \textit{MemberAccess} expression structure is the type of the field named by the \textit{FieldName}.
1739
1740If types \code{A} and \code{R} are defined as in Section~\ref{expr-put} and \code{r} is bound to a value of type \code{R!}
1741then the basic expression \code{r.fld2} is a valid \textit{MemberAccess}. The basic expression
1742\code{r.fld3} is invalid since field \code{fld3} is taken in \code{R!}, the basic expression 
1743\code{r.fld1} is valid since field \code{fld1} has type \code{A!} in \code{R!} (due to recursive application of the bang operator).
1744If \code{r} is bound to a value
1745of type \code{R} then also the basic expression \code{r.fld2} is invalid since type \code{R} is linear.
1746
1747\subsection{General Expressions}
1748
1749In \cogent the most general concept for specifying a calculation as an expression is \textit{matching}. All other
1750forms of general expressions can be understood as specific variants of matching.
1751
1752\subsubsection{Matching Expressions}
1753
1754A \textit{MatchingExpression} matches a value against one (irrefutable) pattern or several (refutable) patterns.
1755For every pattern a subexpression is specified for the result:
1756
1757\vspace{2ex}
1758\indprod{Expression}{
1759  BasicExpression
1760
1761  MatchingExpression
1762  
1763  \ldots
1764}
1765
1766\indprod{MatchingExpression}{
1767  ObservableBasicExpression Alternative \{Alternative\}
1768}
1769
1770\indprod{ObservableBasicExpression}{
1771  BasicExpression
1772  
1773  \ldots
1774}
1775
1776\indprod{Alternative}{
1777  \code{|} Pattern PArr Expression
1778}
1779
1780\indselprod{PArr}{
1781  \code{->} \code{=>} \code{\~{}>}
1782}
1783
1784All \textit{Expression}s in the \textit{Alternative}s must have equal inferred types, this is also the
1785type inferred from the \textit{MatchingExpression}'s structure.
1786
1787For every \textit{Alternative} the \textit{Expression} is called the \textit{scope} of the variables occurring in 
1788the \textit{Pattern}.
1789
1790All \textit{Pattern}s in the \textit{Alternative}s must conform to the type T inferred for the leading expression.
1791The \textit{Pattern}s together must be exhaustive for T, that means, every value of type T must match one of them. This
1792may be accomplished by using an exhaustive set of refutable patterns, such as one for every alternative in a variant type,
1793or by optionally specifying some refutable patterns followed by a final alternative with an irrefutable pattern.
1794
1795
1796The order in which alternatives are specified is irrelevant. The pattern syntax in \cogent 
1797guarantees that different refutable patterns cannot partially overlap, i.e. the sets of matching values
1798are disjunct or equal. Moreover, a refutable pattern may be specified in at most one alternative. Together,
1799every value matches at most one of the refutable patterns, there is no need to resolve conflicts.
1800An irrefutable pattern is only used when no refutable pattern matches.
1801
1802
1803If the variable \code{x} is bound to a value of type \code{U8} an example for a \textit{MatchingExpression} is
1804\begin{verbatim}
1805  x + 7 | 20 -> "too much"
1806        | 10 -> "too few"
1807        | _  -> "unknown"
1808\end{verbatim}
1809It has the inferred type \code{String}.
1810
1811If the variable \code{v} is bound to a value of the variant type
1812
1813\begin{verbatim}
1814  < TwoDim U32 U32 | ThreeDim U32 U32 U32 | Error U8 >
1815\end{verbatim}
1816
1817then the following is a valid \textit{MatchingExpression} with inferred type \code{U32}:
1818
1819\begin{verbatim}
1820  v | TwoDim x y -> x+y
1821    | ThreeDim x y z -> x+y+z
1822    | Error code -> 0
1823\end{verbatim}
1824
1825whereas
1826
1827\begin{verbatim}
1828  v | TwoDim x y -> x+y
1829    | ThreeDim x y z -> x+y+z
1830\end{verbatim}
1831
1832is invalid since it is not exhaustive for the type of \code{v}.
1833
1834\todo{Using layout for Alternative grouping}
1835
1836
1837Alternatively to the separator \code{->} the separators \code{=>} and \code{\~{}>} can be used in an \textit{Alternative}.
1838Semantically they have the same meaning, however they may allow for some code optimization when the first is used for 
1839``likely'' alternatives and the second for ``unlikely'' alternatives.
1840
1841
1842\subsubsection{Binding Variables}
1843\label{expr-let}
1844
1845If the only intention for using a \textit{MatchingExpression} is binding variables, the simpler \textit{LetExpression}
1846syntax can be used:
1847
1848\vspace{2ex}
1849\indprod{Expression}{
1850  BasicExpression
1851
1852  MatchingExpression
1853  
1854  LetExpression
1855  
1856  \ldots
1857}
1858
1859\indprod{LetExpression}{
1860  \code{let} Binding \{\code{and} Binding\} \code{in} Expression
1861}
1862
1863\indprod{Binding}{
1864  IrrefutablePattern [\code{:} MonoType] = ObservableExpression
1865}
1866
1867\indprod{ObservableExpression}{
1868  Expression
1869  
1870  \ldots
1871}
1872
1873A simple \textit{LetExpression} is equivalent to a \textit{MatchingExpression} with one \textit{Alternative}:\\
1874\hspace*{1cm} \code{let} IP \code{=} E \code{in} F\\
1875is semantically equivalent with\\
1876\hspace*{1cm} E \code{|} IP \code{->} F
1877
1878From this it follows that pattern IP must conform to the type inferred for E and the type inferred
1879from the \textit{LetExpression}'s structure is that inferred for F. The expression F is also called the ``body'' of the
1880\textit{LetExpression}, it is the scope of the variables in IP.
1881
1882The \textit{LetExpression}
1883\begin{verbatim}
1884  let x = y + 5 in (True, x)
1885\end{verbatim}
1886binds the variable \code{x} to the result of evaluating the expression \code{y + 5} and evaluates to a tuple
1887where the bound value is used as the second field value. The tuple expression is the scope of variable \code{x}.
1888
1889If types \code{A} and \code{R} are defined as in Section~\ref{expr-put} and \code{r} is bound to a value of type \code{R}
1890then the \textit{LetExpression}
1891\begin{verbatim}
1892  let s {fld1 = x, fld2} = r in (x, fld2 + 5, s)
1893\end{verbatim}
1894binds the variables \code{s}, \code{x}, and \code{fld2} by matching the pattern against the value bound to \code{r}
1895as described in Section~\ref{pat-rec}. Then it uses them in their scope which is a tuple term. 
1896The type inferred for the \textit{LetExpression} is
1897\begin{verbatim}
1898  (A, U32, R take (fld1, fld2))
1899\end{verbatim}
1900
1901In a \textit{Binding} optionally a \textit{MonoType} may be specified:\\
1902\hspace*{1cm} IP \code{:} T \code{=} E\\
1903If neither for E nor the pattern IP a type can be inferred the type specification is mandatory.
1904
1905 If E is an \textit{IntegerLiteral} of type U  and T is a bitstring type which is a superset of U then 
1906the value of E is automatically widened to type T before matching it against IP. Therefore the \textit{LetExpression}
1907\begin{verbatim}
1908  let x: U32 = 5 in (True, x)
1909\end{verbatim}
1910has inferred type \code{(Bool, U32)}, although the literal \code{5} has type \code{U8}.
1911
1912A \textit{LetExpression} of the form\\
1913  \hspace*{1cm} \code{let} B1 \code{and} B2 \code{in} F\\
1914is simply an abbreviation for the nested \textit{LetExpression}\\
1915  \hspace*{1cm} \code{let} B1 \code{in} \code{let} B2 \code{in} F
1916
1917A \textit{LetExpression} which uses the wildcard pattern\\
1918  \hspace*{1cm} \code{let} \code{\_} \code{=} E \code{in} F\\
1919can be abbreviated to\\
1920  \hspace*{1cm} E \code{;} F\\
1921using the following syntax:
1922
1923\vspace{2ex}
1924\indprod{BasicExpression}{
1925  BasExpr
1926  
1927  BasExpr \code{;} Expression
1928}
1929
1930Since a \textit{LetExpression} is only used to bind variables occurring in the pattern and there
1931is no variable in the wildcard pattern this case seems to be useless. Its only use is when
1932expression E has side effects. Note that functions which are completely defined in \cogent do
1933not have side effects, however, functions defined externally can have side effects.
1934
1935An example usage whould be an externally defined function of type \code{String -> ()} which is
1936bound to the variable \code{print} and prints its \textit{String} argument to a display. Then 
1937the expression 
1938
1939\begin{verbatim}
1940  v | TwoDim x y -> print "flat"; x+y
1941    | ThreeDim x y z -> print "space"; x+y+z
1942    | Error code -> print "crash"; 0
1943\end{verbatim}
1944
1945would print one of the strings to the display whenever it is evaluated.
1946
1947\subsubsection{Conditional Expressions}
1948
1949If the only intention for using a \textit{MatchingExpression} is discrimination between two cases
1950the \textit{ConditionalExpression} can be used which is nearly omnipresent in programming languages.
1951It has the usual syntax:
1952
1953\vspace{2ex}
1954\indprod{Expression}{
1955  BasicExpression
1956
1957  MatchingExpression
1958  
1959  LetExpression
1960  
1961  ConditionalExpression
1962}
1963
1964\indprod{ConditionalExpression}{
1965  \code{if} ObservableExpression \code{then} Expression \code{else} Expression
1966}
1967
1968The \textit{ConditionalExpression}\\
1969  \hspace*{1cm} \code{if} C \code{then} E \code{else} F\\
1970is equivalent to the \textit{MatchingExpression} 
1971\begin{tabbing}
1972  \hspace*{1cm} C \= \code{|} \code{True} \code{->} E \\
1973                  \> \code{|} \code{False} \code{->} F
1974\end{tabbing} 
1975 From this it follows that C must have the inferred type \code{Bool} and E and F must have the same inferred type 
1976which is the type inferred from the \textit{ConditionalExpression}'s structure.
1977
1978If a \textit{MatchingExpression} discriminates among more than two cases, as usual
1979a nested \textit{ConditionalExpression} can be used instead. 
1980
1981\todo{Using layout to disambiguate nested ConditionalExpressions}
1982
1983An example for a \textit{ConditionalExpression} is
1984\begin{verbatim}
1985  if x > 5 then (True, "sufficient") else (False, "insufficient)
1986\end{verbatim}
1987It has the inferred type \code{(Bool, String)}.
1988
1989\subsubsection{Observing Variables}
1990
1991At some places variables can be ``observed'' in an expression. Observing a variable means replacing its bound
1992value with a copy of readonly type. Observing variables is the only way how values of readonly types can be
1993produced in \cogent.
1994
1995When a variable should be observed, an expression must be specified as scope of the observation. The readonly
1996value may be freely used in this scope, but it may not escape from it. Syntactically, an expression which may 
1997be the scope of a variable observation is called an \textit{observable expression}.
1998The syntax for variable observation is as follows:
1999
2000\vspace{2ex}
2001\indprod{ObservableBasicExpression}{
2002  BasicExpression
2003  
2004  BasicExpression \{\code{!} Variable\}
2005}
2006
2007\indprod{ObservableExpression}{
2008  Expression
2009  
2010  Expression \{\code{!} Variable\}
2011}
2012
2013In both cases one or more observed variables are specified at the \textit{end} of the observation scope
2014using the ``bang'' operator as a prefix. Examples for \textit{ObservableExpression}s are
2015\begin{verbatim}
2016  if isok #{fld1=x, fld2=x, fld3=z} then 5 else 0 !x !y
2017  let v1 = x and v2 = x and v3 = z in (1, 2, 3) !x !z
2018\end{verbatim}
2019
2020If there is at least one banged variable in an observable expression, then the inferred type of the scope
2021may not be an escape-restricted type. 
2022
2023The \textit{ObservableExpression}\\
2024\hspace*{1cm} E \code{!}V\\
2025is conceptually equivalent to a \textit{LetExpression} of the form\\
2026\hspace*{1cm} \code{let} V \code{=} \code{readonly} V \code{in} E\\
2027where \code{readonly} would be an operator which produces a readonly copy from a value. An important effect of
2028this form is that the variable used for the readonly copy has the same name as the variable containing the original
2029value. Therefore the former variable shadows the latter in its scope, making the original value unaccessible there.
2030
2031The operator \code{readonly} does not actually exist in \cogent, hence expressions of the second form cannot be used
2032to bind readonly copies. This guarantees that the variable for the readonly copy \textit{always} shadows the
2033original value in its scope.
2034
2035Observable expressions may only occur in three places: As the leading expression in a \textit{MatchingExpression} and 
2036in the corresponding position in the more specific forms, which is the right-hand side of a \textit{Binding} in a 
2037\textit{LetExpression} and the condition in a \textit{ConditionalExpression}. 
2038
2039\section{Expression Usage Rules}
2040\label{expr-usage}
2041
2042\cogent's linear type system implies additional restrictions on expression usage over the usual restriction that
2043the type of a function argument must be compatible to the parameter type. The additional rules are described in 
2044this section.
2045
2046\subsection{Using Values of Linear Types}
2047
2048The basic rule for linear types is that their values must be used exactly once. For observing this rule it must 
2049be specified in more detail, what it means to use a value.
2050
2051\subsubsection{Sharing a Value}
2052
2053In a \cogent program, values are always denoted by expressions. If an expression is a \textit{Term} for a tuple, a record,
2054or a variant type, or if it is a \textit{BasExpr} representing the application of a function or operator, or if it is
2055a \textit{MatchingExpression} or one of its specific variants, the value is created by evaluating the expression. Then
2056it can only be used atmost once: at the position where the expression syntactically occurs in the program. In the remaining
2057cases the expression is either a single variable or a \textit{MemberAccess} (values of literals are never linear). 
2058A value 
2059bound to a variable can be used more than once: it is used at all places where it is referenced by
2060the variable name in its scope. The value of a record field can be used more than once by accessing the field
2061several times. In both cases we say the value may be ``shared''.
2062
2063When a record field is accessed its value is not taken from the record, hence it is already shared between the record
2064and the access result upon a single member access. As a consequence, record fields of linear type may not be accessed
2065using a \textit{MemberAccess} expression.
2066
2067Hence the rule for using values of linear types not more than once is only relevant for variables: 
2068if a variable has a 
2069bound value of a linear type, the value must be used atmost once by referencing it, it may not be shared. However, 
2070as can be seen for the variable \code{v} in the example
2071\begin{verbatim}
2072  if x == 5 then f v else g v
2073\end{verbatim}
2074the number of uses of the value is not simply the number of occurrences of the variable name in its scope.  Instead,
2075the rule is that a variable of linear type must occur atmost once in all possible paths of an execution. Thus,
2076for a \textit{ConditionalExpression} it must either occur once in the condition, or in each branch. For
2077a \textit{MatchingExpression} it must either occur once in the leading 
2078\textit{ObservableBasicExpression}, or in each \textit{Alternative}.
2079
2080Note that the field names in a \textit{RecordTerm}, a \textit{PutExpression}, a \textit{RecordPattern} or a
2081\textit{MemberAccess} are irrelevant, even if a field is present with the same name as the variable. Moreover,
2082only free occurrences count. If a variable of the same name is bound in the scope, the binding and its usages
2083are irrelevant for the original variable. Variables are bound by \textit{LetExpression}s, 
2084\textit{ObservableExpression}s, \textit{ObservableBasicExpression}s, and \textit{LambdaTerm}s.
2085
2086
2087\subsubsection{Discarding a Value}
2088
2089
2090If a variable is never used in its scope its value is ``discarded''. Values of linear type
2091may not be discarded. This is guaranteed for values bound to a variable, if it is used in every possible path of 
2092execution. 
2093
2094
2095Although the value of an expression other than a variable or member access cannot be used more than once, it can be discarded
2096by matching the expression with a pattern other than a variable or a boxed record pattern. In the case of the wildcard pattern 
2097as in
2098\begin{verbatim}
2099  let _ = someExpression
2100\end{verbatim}
2101the expression \code{someExpression} may have a linear type, then this matching would be illegal. In the case of a 
2102\textit{LiteralPattern} the expression must always have a primitive type which is never linear. The same holds for an expression
2103which occurs as condition in a \textit{ConditionalExpression}. 
2104
2105In the case of a 
2106\textit{TuplePattern}, a \textit{VariantPattern} or an unboxed \textit{RecordPattern} the expression only has a linear
2107type if it has components of a linear type. Then it is no problem to discard the value as long as no component of a 
2108linear type is discarded, as in 
2109
2110\begin{verbatim}
2111  let (a, #{fld1= _, fld2=b}, c) = someExpression
2112\end{verbatim}
2113
2114In this case the \code{fld1} of the second field of the value is discarded which would be illegal if it has linear type.
2115
2116
2117A record field is also discarded if it is replaced in a \textit{PutExpression}. Therefore in a \textit{PutExpression}
2118the leading \textit{BasExpr} must not have linear fields which are put, if there are linear fields they must have been taken.
2119
2120The value of an expression is discarded when the expression is used as the \textit{BasExpr} in a \textit{MemberAccess}.
2121
2122Together, linear values could be discarded by binding them to a variable which is never used in its scope, by matching them 
2123with the wildcard pattern, by replacing them in a \textit{PutExpression}, or by using them as the record in a \textit{MemberAccess}.
2124All these cases are not allowed for values of linear type in \cogent.
2125
2126However, there are two other cases which specifically apply to values of a boxed record type. If such an expression is used 
2127as the leading expression in a \textit{PutExpression} or if it is matched against a \textit{RecordPattern}, it is discarded
2128as well. These two cases are allowed in \cogent. Note that in both cases a new value of the same type is created, in the
2129first case it becomes the result of the \textit{PutExpression}, in the second case it is bound to the leading variable of the
2130\textit{RecordPattern}.
2131
2132\subsubsection{The Result of Using a Value}
2133
2134What happens to a value after it has been used? ``Using'' here only means a \textit{syntactical} usage, it does not mean
2135that the value is dismissed afterwards. Depending on the context of usage there are three possibilities: the value may immediately
2136be used in the context, it may become a part of another value (its ``container'' value), or it may be bound to a variable.
2137
2138If the value results from evaluating an expression E in an \textit{Alternative}, in a branch of a \textit{ConditionalExpression}, or 
2139in the body of a \textit{LetExpression}, then the value becomes the evaluation result of the expression containing E and is immediately
2140used in the context.
2141
2142If expression E occurs as subexpression in a tuple term, a record term, or a variant term, or in a \textit{RecordAssignment} of
2143a \textit{PutExpression}, its evaluation result becomes a part of its container value created by the term or \textit{PutExpression}, 
2144respectively. Since a value of linear type may be used only once, it is always the part of atmost one container value. The container
2145value, since it has a part of linear type is also of linear type and behaves in the same way. 
2146
2147Whenever a container value is used, it is used with all its parts. A linear part can be separated from its container by matching the
2148container value with a complex pattern which binds the part to a variable and dismisses the container. If the container is a boxed
2149record, a new container will be created where the part is taken. Thus, after binding the part to a variable 
2150it is not a part of its container anymore.
2151
2152If expression E is the leading expression in a \textit{MatchingExpression}, or occurs in a \textit{Binding} of a \textit{LetExpression}, 
2153or is the argument in a \textit{FunctionApplication},
2154then it is matched against a pattern. If the pattern is a variable, the evaluation result is bound to the variable. It remains bound to
2155it until the evaluation of its scope ends. However, if the value is of linear type, it cannot be referenced by the variable after
2156its first use, hence thereafter the binding is irrelevant.
2157
2158Note that the body expression in a \textit{LambdaTerm} is not evaluated when evaluating the \textit{LambdaTerm} to yield a function.
2159The body will only be evaluated when the function is applied to an argument.
2160
2161Taking it all together, the usage rules imply that a linear value in a pure \cogent program is always either bound to exactly one variable 
2162which has not yet been used or it is a part of exactly one container value which also is linear. In a \cogent program linear values are 
2163only dismissed and created in \textit{PutExpression}s and by matching boxed \textit{RecordPatterns}. In both cases a boxed record value is 
2164dismissed and a value of the same type is created.
2165
2166These properties are exploited by \cogent in the following way. Whenever a boxed record is dismissed it is ``reused'' to create the 
2167new value. Since the new value only differs from the old value by some fields having a different value, the old value is \textit{modified}
2168by replacing these field values. As a consequence, linear values are \textit{never} created or destroyed in a \cogent program, 
2169they are only passed around as a single copy, possibly being modified on their way. Creating or destroying linear values must be accomplished 
2170externally implemented in C.
2171
2172\subsection{Using Values of Readonly Types}
2173
2174The basic rule for readonly types is that their values may not be modified. Of course, since \cogent is a functional language,
2175values are conceptually never modified. However we have seen that value modification occurs in \cogent as an optimization for
2176linear values, although semantically this modification can never be observed. 
2177
2178\subsubsection{Modifying a Value}
2179
2180The only way to modify a value in \cogent is by changing the value of a field in a boxed record. This can be achieved
2181with the help of a \textit{PutExpression} where a new value is specified for a field. It can also be achieved with the
2182help of a  take operation by matching a \textit{RecordPattern} with a boxed record value. 
2183
2184Therefore the following rules apply to values of readonly types:
2185\begin{itemize}
2186\item a value of readonly type may not be used as the leading \textit{BasExpr} in a \textit{PutExpression},
2187\item a value of readonly type may not be matched against a  record pattern.
2188\end{itemize}
2189
2190
2191When taking a field from a readonly record it is irrelevant whether the field has linear type or not. In both cases
2192the record would be modified which is not allowed. If the field has non-linear type, the taken value could
2193remain in the record. However, \cogent implements taking fields always by removing the field value from the record, 
2194thus modifying the record.
2195
2196
2197\subsubsection{Creating readonly Values}
2198
2199The only way to create a value of readonly type is to apply the bang operator to a variable in an
2200\textit{ObservableExpression} or \textit{ObservableBasicExpression}. This creates a readonly copy of the bound value
2201and binds it to the same variable, using the subexpression  before the first banged variable  as scope for this binding. We call this subexpression a
2202banged scope. If the previosly bound value had the linear type T, the readonly copy has type T! which is readonly or contains
2203readonly parts.
2204
2205Note that the original binding is shadowed in the banged scope, hence the linear value cannot be referenced there, 
2206in particular, it cannot be modified. This is exploited by \cogent in the following way. The original value is 
2207actually not copied at all, it remains bound to its variable. Only its type as seen through the variable is changed to T!
2208in the banged scope. 
2209
2210In the banged scope the readonly copies can be freely duplicated, bound to any number of variables and inserted
2211as parts in any number of container values.
2212
2213\subsubsection{Preventing Values from Escaping}
2214
2215When execution leaves the banged context the shadowing ends and original value of linear type may be accessed again
2216and may be modified. Although all copies are still of readonly type, they would be modified as well, since actually
2217they have not been copied. This problem is solved by \cogent by preventing the copies to ``escape'' from the banged 
2218scope. Then they cannot be referenced and observed outside the scope and modifications to the original value
2219are no problem.
2220
2221If a readonly copy is bound to a variable, the scope of this binding must be syntactically enclosed in the banged
2222scope and cannot be referenced outside. The only way a value can escape from the banged scope is if it is the result
2223value the banged scope evaluates to or a part of it. This must be prevented by \cogent.
2224
2225It seems that to achieve this \cogent has to ``track'' all readonly copies and prevent them to become a part of 
2226the result value. However, it is impossible to do this statically, since a copy can be passed to an externally
2227defined function which may return it as part of its result without \cogent knowing this. Therefore a simpler
2228but much more radical approach is used, by preventing \textit{all} values with an escape-restricted type from
2229escaping from \textit{any} banged scope, irrespective whether it is related to the value or type of the 
2230banged variable. This safely also prevents the readonly copies from escaping. 
2231
2232This approach can be implemented with the help of type checking. The rule to apply is that the type inferred 
2233for a banged scope in an \textit{ObservableExpression} or \textit{ObservableBasicExpression} must not be
2234escape-restricted.
2235
2236This rule implies that even readonly values which existed outside of the banged scope cannot be used as part
2237of its result. Normally this is not a problem since they are available outside the banged scope anyways.
2238However, if the value's type is both escape-restricted and linear, the situation is different. Due to
2239the linearity, the value must not be discarded in the banged scope, it must leave it, which is not allowed
2240either. The solution here is to separate all escape-restricted parts from the rest, discard them in the 
2241banged scope and let the rest escape.
2242
2243\chapter{Programs}
2244
2245A \cogent program is a sequence of toplevel definitions and include statements.
2246There is no main program, it must always be implemented externally in C.
2247
2248\vspace{2ex}
2249\indprod{Program}{
2250  TopLevelUnit \{TopLevelUnit\}
2251}
2252
2253\section{Including Files}
2254
2255For modularization purpose a \cogent program may be distributed among several files
2256using include statements. Like in many other programming languages, an include statement 
2257is replaced by the content of the included file.
2258
2259The syntax for an include statement is:
2260
2261\vspace{2ex}
2262\indprod{TopLevelUnit}{
2263  Include
2264  
2265  \ldots
2266}
2267
2268\indprod{Include}{
2269  \code{include} StringLiteral
2270  
2271  \code{include} SystemFile
2272}
2273
2274\indinfprod{Systemfile}{
2275  A file pathname enclosed in \code{<} and \code{>}.
2276}
2277
2278The \textit{StringLiteral} specifies the pathname of the file to be included, either as 
2279an absolute path or as a path relative to the directory where the file containing the include statement
2280resides.  A \textit{SystemFile}, like in C, specifies a file which is searched at standard places by the
2281\cogent compiler.
2282
2283Include statements are transitive, if an included file contains include statements they are executed as well.
2284
2285However, every file is included only once. If several include statements specify the same file, it is
2286only include by the first statement seen when processing the \cogent source file, all other inclusions
2287of the file are ignored. This is also true for transitive includes, in particular, circular includes do no harm.
2288The effect is the same that is usually achieved in C by \#DEFINEing a flag in an include file and including 
2289the file body only if the flag is not yet set.
2290
2291\section{Toplevel Definitions}
2292\label{toplevel-def}
2293
2294The only syntactical constructs which may occur as toplevel units in a \cogent source program are \textit{definitions}.
2295
2296\vspace{2ex}
2297\indprod{TopLevelUnit}{
2298  Include
2299  
2300  Definition
2301}
2302
2303\indprod{Definition}{
2304  TypeDefinition
2305
2306  \ldots
2307}
2308
2309A definition may be a type definition, as described in Section~\ref{def-type}.
2310
2311\subsection{Value Definitions}
2312\label{value-def}
2313
2314A definition may also be a \textit{value definition}. It has the following syntax:
2315
2316\vspace{2ex}
2317\indprod{Definition}{
2318  TypeDefinition
2319
2320  ValueDefinition
2321  
2322  \ldots
2323}
2324
2325\indprod{ValueDefinition}{
2326  Signature Variable \code{=} Expression
2327}
2328
2329\indprod{Signature}{
2330  Variable \code{:} PolyType
2331}
2332
2333\indprod{PolyType}{
2334  MonoType
2335  
2336  \ldots
2337}
2338
2339A value definition is conceptually mainly a syntactical variant for a \textit{LetExpression} which binds a single variable.
2340However, there are the following differences:
2341\begin{itemize}
2342\item the variable bound by a definition is a \textit{global} variable which can be referenced in 
2343\textit{LambdaExpression}s (see Section~\ref{term-lambda}),
2344\item the scope of the variable consists of the whole \cogent program after \textit{and before} the definition,
2345\item the type specification is mandatory and  in the case of a function type,  instead of a 
2346\textit{MonoType} it may be a more general \textit{PolyType} (see Section~\ref{def-poly}).
2347\end{itemize}
2348
2349A \textit{ValueDefinition} of the form\\
2350\hspace*{1cm} V \code{:} T V \code{=} E\\
2351is conceptually equivalent with\\
2352\hspace*{1cm} \code{let} V \code{:} T = E \code{in} F\\
2353where F is the whole \cogent program around the definition. This equivalence is only conceptual, syntactically 
2354it is not correct, since F is not an \textit{Expression} and cannot be expressed as one.
2355
2356Note that the variable V has to be specified twice. It is an error if two different variables are used in 
2357a value definition.
2358
2359Like type definitions, value definitions in \cogent are restricted to be not recursive: the variable V may not
2360occur freely in the expression E and there may be no cyclic references between different value definitions.
2361
2362The \textit{Expression} E may only contain free occurrences of global variables which have been bound in 
2363other value definitions. The \textit{Expression} E and in the \textit{PolyType} T may contain all
2364type synonyms which are defined in a type definition before or after the value definition.
2365
2366An example for a value definition is
2367\begin{verbatim}
2368  maxSize: U16
2369  maxSize = 42
2370\end{verbatim}
2371
2372\todo{layout rules for value definitions}
2373
2374\subsection{Function Definitions}
2375\label{fun-def}
2376
2377A function definition is a special case of a value definition, where the value has a function type.
2378This could be achieved with a normal value definition using a lambda expression to specify the
2379value to be bound. However, for function definitions additional syntactical forms are supported in \cogent:
2380
2381\vspace{2ex}
2382\indprod{Definition}{
2383  TypeDefinition
2384
2385  ValueDefinition
2386  
2387  FunctionDefinition
2388  
2389  \ldots
2390}
2391
2392\indprod{FunctionDefinition}{
2393  Signature Variable IrrefutablePattern \code{=} Expression
2394  
2395  Signature Variable Alternative \{Alternative\}
2396}
2397
2398A \textit{FunctionDefinition} of the form 
2399\begin{tabbing}
2400\hspace*{1cm} \= V \code{:} T \\
2401              \> V IP \code{=} E
2402\end{tabbing}
2403is semantically equivalent with
2404\begin{tabbing}
2405\hspace*{1cm} \= V \code{:} T \\
2406              \> V \verb|= \| IP \code{=>} E
2407\end{tabbing}
2408
2409In a function definition the type T must of course be a function type.
2410
2411An example for this kind of function definition is
2412\begin{verbatim}
2413  f: (U32, U32) -> #{sum: U32, dif: U32}
2414  f v = let (x,y) = v in #{sum=x+y, dif=x-y}
2415\end{verbatim}
2416where the variable \code{v} is used to reference the function argument. Note that by using a pattern
2417instead of a single variable, it is possible to directly access the argument components according to the
2418argument type:
2419\begin{verbatim}
2420  f: (U32, U32) -> #{sum: U32, dif: U32}
2421  f (x,y) = #{sum=x+y, dif=x-y}
2422\end{verbatim}
2423
2424The second form of a function definition is intended for the case that the argument is not matched against
2425a single irrefutable pattern but instead against several exhaustive refutable patterns.
2426Then the \textit{FunctionDefinition} of the form 
2427\begin{tabbing}
2428\hspace*{1cm} \= V \code{:} T\\
2429              \> V A1 \ldots An
2430\end{tabbing}
2431is semantically equivalent with
2432\begin{tabbing}
2433\hspace*{1cm} \= V \code{:} T\\
2434              \> V \code{arg = arg} A1 \ldots An
2435\end{tabbing}
2436where \code{arg} is a new variable not occurring elsewhere.
2437
2438Examples are the function definitions
2439
2440\begin{verbatim}
2441  f: <TwoDim U32 U32 | ThreeDim U32 U32 U32 | Error U8> -> (U32, U32)
2442  f | TwoDim x y -> (y,x) 
2443    | ThreeDim x y z -> (y,z) 
2444    | Error _ -> (0,0)
2445
2446  g: U8 -> U8
2447  g | 0 -> 'a' 
2448    | 1 -> 'b' 
2449    | 2 -> 'c' 
2450    | _ -> 'd'
2451\end{verbatim}
2452
2453
2454\todo{layout rules}
2455
2456\subsection{Abstract Definitions}
2457
2458An \textit{abstract} definition only specifies the type of a value bound to a variable but not the value itself.
2459Abstract definitions are only allowed if the bound value has a function type. 
2460The syntax is a normal value definition reduced to its signature:
2461
2462\vspace{2ex}
2463\indprod{Definition}{
2464  TypeDefinition
2465
2466  ValueDefinition
2467  
2468  FunctionDefinition
2469  
2470  AbstractDefinition
2471  
2472  \ldots
2473}
2474
2475\indprod{AbstractDefinition}{
2476  Signature
2477}
2478
2479The purpose of abstract definitions is to define functions which are implemented externally as C functions.
2480
2481A collection of abstract definitions together with corresponding type definitions is often called an ``abstract data type'' 
2482(``ADT''). Typically an abstract data type consists of one or more abstract type definitions and abstract definitions for
2483functions working with values of these types, where both types and functions are externally defined in C.
2484
2485\subsection{Polymorphic Definitions}
2486\label{def-poly}
2487
2488 Function  values bound by toplevel definitions may be \textit{polymorphic} which means that their
2489type is not specified uniquely.
2490This is achieved by allowing free type variables in the value's type as specified in the definition. A type expression which 
2491may contain free type variables is called a \textit{PolyType} in \cogent. Syntactically \textit{PolyType}s must be closed
2492by binding the free type variables by an ``all-quantification''. The syntax is as follows:
2493
2494\vspace{2ex}
2495\indprod{PolyType}{
2496  MonoType
2497  
2498   \code{all} PermSignatures \code{.} MonoType 
2499}
2500
2501
2502\indprod{PermSignatures}{
2503  PermSignature
2504  
2505  \code{(} PermSignature \{\code{,} PermSignature\} \code{)}
2506}
2507
2508\indprod{PermSignature}{
2509  TypeVariable
2510
2511  \ldots
2512}
2513
2514Here all type variables which occur free in the \textit{MonoType} must be listed in the \textit{PermSignatures}.
2515An example for a polymorphic value definition is
2516
2517\begin{verbatim}
2518  f: all (t, u). (t, u) -> (U32, u, U16, t)
2519  f (x,y) = (200, y, 100, x)
2520\end{verbatim}
2521
2522Since the types  \code{t} and \code{u}  are unknown, no expressions can be specified for their values other than 
2523variables to which the values have been bound. As a consequence, polymorphic values are  always  polymorphic functions
2524which take the values of the unknown types as (part of) their argument and only pass them around, perhaps placing them
2525in the function result.
2526
2527A typical example for a  polymorphic function  works with lists of arbitrary elements.
2528Therefore no specific type shall be specified for the list elements, which is achieved by using a free type variable
2529for it. The corresponding list type can be defined as a generic abstract type:
2530\begin{verbatim}
2531  type List e
2532\end{verbatim}
2533Then the usual functions working on lists can be defined by the following abstract polymorphic function definitions:
2534
2535\begin{verbatim}
2536  first: all e. List e -> Option e
2537  rest: all e. List e -> List e
2538  cons: all e. (e, List e) -> List e
2539\end{verbatim}
2540
2541Together these definitions constitute an abstract data type for lists. Note, that neither the list type nor the list 
2542functions can be defined in \cogent since they would require recursion. 
2543
2544Even when a value of an unknown type is only carried around, additional information about the type is needed for doing
2545this correctly: If the type is linear, the value may still be used only once, whereas the value may be freely copied, if
2546the type is non-linear. Therefore it is possible to specify  ``permissions'' for a type variable in the
2547\textit{PermSignatures}  using the following syntax:
2548
2549\vspace{2ex}
2550\indprod{PermSignature}{
2551  TypeVariable
2552
2553  TypeVariable :< Permissions
2554}
2555
2556\indprod{Permissions}{
2557  Permission \{Permission\}
2558}
2559
2560\indselprod{Permission}{
2561  \code{D S E}
2562}
2563
2564The permissions associated with a type variable specify what must be possible for values of that type. Permission \code{D} means
2565the values can be \textit{discarded}, permission \code{S} means the values can be \textit{shared}, and permission \code{E}
2566means that values may \textit{escape} from a banged context. If a type variable has kind \code{DSE} the actual type must be regular.
2567If a type variable has kind \code{DS} the actual type must not be linear, it may be regular or escape-restricted. If it has kind 
2568\code{E} the actual type must not be escape-restricted, it may be regular or linear. 
2569
2570If no  \textit{Permissions} are specified for a type variable the default permissions \code{E} apply.
2571
2572In the example
2573
2574\begin{verbatim}
2575  f: all (t, u :< DSE) . (t, u) -> (U32, u, U16, t, u)
2576  f (x,y) = (200, y, 100, x, y)
2577\end{verbatim}
2578
2579the type \code{t} has default  permissions  \code{E} and is thus required to be escapable. 
2580Type \code{u} is required to be regular and it is correct to use parameter \code{y} more than once in the body expression.
2581
2582Whenever a global variable bound by a polymorphic value definition is referenced, actual types must be substituted for 
2583the free type variables. These types  can  be explicitly specified using the following syntax:
2584
2585\vspace{2ex}
2586\indprod{Term}{
2587  \code{(} Expression \code{)}
2588  
2589  Variable
2590
2591  LiteralTerm
2592  
2593  TupleTerm
2594  
2595  RecordTerm
2596
2597  VariantTerm
2598  
2599  LambdaTerm
2600  
2601  PolyVariable
2602}
2603
2604\indprod{PolyVariable}{
2605   Variable \code{[} OptMonoType \{\code{,} OptMonoType\} \code{]}
2606}
2607
2608\indprod{OptMonoType}{
2609  MonoType
2610  
2611  \code{\_}
2612}
2613
2614If the types are not specified or if some types are specified by \code{\_}, the compiler tries to infer them.
2615If the compiler is unable to infer the types, then they must be explicitly specified. For example,
2616if the compiler has difficulty with the last type argument, instead of
2617\code{f \[U8, Char, <A U8\|B U16>\]}, we can write \code{f \[\_, \_, <A U8 \| B U16>\]}.
2618
2619
2620If \code{f} has been bound by the polymorphic definition above, example references are
2621\begin{verbatim}
2622  f[{fld1: U8, fld2: U8},U32]
2623  f[U16,{fld1: U8, fld2: U8}]
2624\end{verbatim}
2625where the second reference is illegal since the second type variable \code{t} is substituted by type 
2626\code{\{fld1: U8, fld2: U8\}}  which
2627is not regular.
2628
2629\chapter{Grammar}
2630
2631Here we use a grammar notation which is similar to that used in the Java language specifications.
2632The meta constructs have the following meaning:
2633\begin{itemize}
2634\item The italic brackets \textit{[]} make their content optional.
2635\item The italic braces \textit{\{\}} make their content repeatable (and optional).
2636\end{itemize}
2637Nonterminals are denoted in \textit{italics}, literal code is denoted in \code{typewriter} font.
2638
2639Productions are structured by indenting the right-hand side, every single line is one alternative.
2640There are special forms of productions for selecting among a set of terminals and for specifying
2641the syntax od a nonterminal informally.
2642
2643\vspace{2ex}
2644\noindent\gramprod{Program}{
2645  TopLevelUnit \{TopLevelUnit\}
2646}
2647\gramprod{TopLevelUnit}{
2648  Include
2649  
2650  Definition
2651}
2652\gramprod{Include}{
2653  \code{include} StringLiteral
2654  
2655  \code{include} SystemFile
2656}
2657\graminfprod{Systemfile}{
2658  A file pathname enclosed in \code{<} and \code{>}.
2659}
2660\gramprod{Definition}{
2661  TypeDefinition
2662
2663  ValueDefinition
2664  
2665  FunctionDefinition
2666  
2667  AbstractDefinition
2668}
2669\gramprod{TypeDefinition}{
2670  \code{type} TypeConstructor \{TypeVariable\} \code{=} MonoType
2671
2672  AbstractTypeDefinition
2673}
2674\gramprod{TypeConstructor}{
2675  CapitalizedId
2676}
2677\gramprod{TypeVariable}{
2678  LowercaseId
2679}
2680\gramprod{AbstractTypeDefinition}{
2681  \code{type} TypeConstructor \{TypeVariable\}
2682}
2683\gramprod{MonoType}{
2684  TypeA1
2685  
2686  FunctionType
2687}
2688\gramprod{FunctionType}{
2689   TypeA1 \code{->} TypeA1 
2690}
2691\gramprod{TypeA1}{
2692  TypeA2
2693  
2694  ParameterizedType
2695  
2696  PartialRecordType
2697}
2698\gramprod{ParameterizedType}{
2699  TypeConstructor \{TypeA2\}
2700}
2701\gramprod{PartialRecordType}{
2702  TypeA2 TakePut TakePutFields
2703}
2704\gramselprod{TakePut}{
2705  \code{take} \code{put}
2706}
2707\gramprod{TakePutFields}{
2708  FieldName
2709
2710  \code{(} [FieldName \{\code{,} FieldName\}] \code{)}
2711  
2712  \code{( .. )}
2713}
2714\gramprod{FieldName}{
2715  LowercaseId
2716}
2717\gramprod{TypeA2}{
2718  AtomType
2719
2720  \code{\#} AtomType
2721  
2722  AtomType \code{!}
2723}
2724\gramprod{AtomType}{
2725  \code{(} MonoType \code{)}
2726  
2727  TypeConstructor
2728  
2729  TupleType
2730
2731  RecordType
2732  
2733  VariantType
2734
2735  TypeVariable
2736}
2737\gramprod{TupleType}{
2738  \code{()}
2739
2740  \code{(} MonoType \code{,} MonoType \{\code{,} MonoType\} \code{)}
2741}
2742\gramprod{RecordType}{
2743  \code{\{} FieldName \code{:} MonoType \{\code{,} FieldName \code{:} MonoType\} \code{\}}
2744}
2745\gramprod{VariantType}{
2746   \code{<} DataConstructor \{TypeA2\} \{\code{|} DataConstructor \{TypeA2\}\} \code{>}
2747}
2748\gramprod{DataConstructor}{
2749  CapitalizedId
2750}
2751\gramprod{ValueDefinition}{
2752  Signature Variable \code{=} Expression
2753}
2754\gramprod{Variable}{
2755  LowercaseId
2756}
2757\gramprod{Signature}{
2758  Variable \code{:} PolyType
2759}
2760\gramprod{PolyType}{
2761  MonoType
2762  
2763  \code{all} PermSignatures \code{.} MonoType
2764}
2765\gramprod{PermSignatures}{
2766  PermSignature
2767  
2768  \code{(} PermSignature \{\code{,} PermSignature\} \code{)}
2769}
2770\gramprod{PermSignature}{
2771  TypeVariable
2772
2773  TypeVariable :< Permissions
2774}
2775\gramprod{Permissions}{
2776  Permission \{Permission\}
2777}
2778\gramselprod{Permission}{
2779  \code{D S E}
2780}
2781\gramprod{FunctionDefinition}{
2782  Signature Variable IrrefutablePattern \code{=} Expression
2783  
2784  Signature Variable Alternative \{Alternative\}
2785}
2786\gramprod{AbstractDefinition}{
2787  Signature
2788}
2789\gramprod{Pattern}{
2790  \code{(} Pattern \code{)}
2791  
2792  IrrefutablePattern
2793
2794  LiteralPattern
2795  
2796  VariantPattern
2797}
2798\gramprod{LiteralPattern}{
2799  BooleanLiteral
2800  
2801  IntegerLiteral
2802  
2803  CharacterLiteral
2804}
2805\gramprod{IrrefutablePattern}{
2806  Variable 
2807  
2808  WildcardPattern
2809
2810  TuplePattern
2811  
2812  RecordPattern
2813}
2814\gramprod{WildcardPattern}{
2815  \code{\_}
2816}
2817\gramprod{TuplePattern}{
2818  \code{()}
2819  
2820  \code{(} IrrefutablePattern \code{,} IrrefutablePattern \{\code{,} IrrefutablePattern\} \code{)}
2821}
2822\gramprod{RecordPattern}{
2823  Variable \code{\{} RecordMatchings \code{\}}
2824  
2825  \code{\#} \code{\{} RecordMatchings \code{\}}
2826}
2827\gramprod{RecordMatchings}{
2828   RecordMatching \{\code{,} RecordMatching\} 
2829
2830  
2831}
2832\gramprod{RecordMatching}{
2833  FieldName [= IrrefutablePattern]
2834}  
2835\gramprod{VariantPattern}{
2836   DataConstructor \{IrrefutablePattern\}
2837}
2838\gramprod{Expression}{
2839  BasicExpression
2840
2841  MatchingExpression
2842
2843  LetExpression
2844  
2845  ConditionalExpression
2846}
2847\gramprod{BasicExpression}{
2848  BasExpr
2849  
2850  BasExpr \code{;} Expression
2851}
2852\gramprod{MatchingExpression}{
2853  ObservableBasicExpression Alternative \{Alternative\}
2854}
2855\gramprod{ObservableBasicExpression}{
2856  BasicExpression
2857  
2858  BasicExpression \{\code{!} Variable\}
2859}
2860\gramprod{Alternative}{
2861  \code{|} Pattern PArr Expression
2862}
2863\gramselprod{PArr}{
2864  \code{->} \code{=>} \code{\~{}>}
2865}
2866\gramprod{LetExpression}{
2867  \code{let} Binding \{\code{and} Binding\} \code{in} Expression
2868}
2869\gramprod{Binding}{
2870  IrrefutablePattern [\code{:} MonoType] = ObservableExpression
2871}
2872\gramprod{ObservableExpression}{
2873  Expression
2874  
2875  Expression \{\code{!} Variable\}
2876}
2877\gramprod{ConditionalExpression}{
2878  \code{if} ObservableExpression \code{then} Expression \code{else} Expression
2879}
2880\gramprod{BasExpr}{
2881  Term
2882  
2883  FunctionApplication
2884  
2885  OperatorApplication
2886  
2887  PutExpression
2888 
2889  MemberAccess
2890}
2891\gramprod{FunctionApplication}{
2892  BasExpr BasExpr
2893}
2894\gramprod{OperatorApplication}{
2895  UnaryOP BasExpr
2896  
2897  BasExpr BinaryOp BasExpr
2898}
2899\gramselprod{UnaryOp}{
2900  complement not
2901}
2902\gramselprod{BinaryOp}{
2903   \code{o * / \% + - >= > == /= < <= .\&. .\^{}. .|. >{}> <{}< \&\& || \$}
2904}
2905\gramprod{PutExpression}{
2906  BasExpr \{ Record Assignments \}
2907}
2908\gramprod{RecordAssignments}{
2909  RecordAssignment \{\code{,} RecordAssignment\}
2910}
2911\gramprod{RecordAssignment}{
2912  FieldName [\code{=} Expression]
2913}
2914\gramprod{MemberAccess}{
2915  BasExpr \code{.} FieldName
2916}
2917\gramprod{Term}{
2918  \code{(} Expression \code{)}
2919  
2920  Variable
2921
2922  LiteralTerm
2923  
2924  TupleTerm
2925  
2926  RecordTerm
2927
2928  VariantTerm
2929  
2930  LambdaTerm
2931  
2932  PolyVariable
2933}
2934\gramprod{LiteralTerm}{
2935  BooleanLiteral
2936  
2937  IntegerLiteral
2938  
2939  CharacterLiteral
2940  
2941  StringLiteral
2942}
2943\gramselprod{BooleanLiteral}{
2944  \code{True} \code{False}
2945}
2946\gramprod{IntegerLiteral}{
2947  DecDigits
2948  
2949  \code{0x} HexDigits
2950  
2951  \code{0X} HexDigits
2952
2953  \code{0o} OctDigits
2954
2955  \code{0O} OctDigits
2956}
2957\graminfprod{DecDigits}{
2958  A sequence of decimal digits 0-9.
2959}
2960\graminfprod{HexDigits}{
2961  A sequence of hexadecimal digits 0-9, A-F.
2962}
2963\graminfprod{OctDigits}{
2964  A sequence of octal digits 0-7.
2965}
2966\graminfprod{CharacterLiteral}{
2967  An ASCII character enclosed in single quotes.
2968}
2969\graminfprod{StringLiteral}{
2970  A sequence of ASCII characters enclosed in double quotes.
2971}
2972\gramprod{TupleTerm}{
2973  \code{()}
2974  
2975  \code{(} Expression \code{,} Expression \{\code{,} Expression\} \code{)}
2976}
2977\gramprod{RecordTerm}{
2978  \code{\#} \code{\{} RecordAssignments \code{\}}
2979}
2980\gramprod{VariantTerm}{
2981   DataConstructor \{Term\}
2982}
2983\gramprod{LambdaTerm}{
2984  \code{$\backslash$} IrrefutablePattern [\code{:} MonoType] \code{=>} Expression
2985}
2986\gramprod{PolyVariable}{
2987  Variable \code{[} OptMonoType \{\code{,} OptMonoType\} \code{]}
2988}
2989\gramprod{OptMonoType}{
2990  MonoType
2991  
2992  \code{\_}
2993}
2994\graminfprod{LowercaseID}{
2995  A sequence of letters, digits and underscore symbols
2996  
2997  starting with a lowercase letter
2998}
2999\graminfprod{CapitalizedID}{
3000  A sequence of letters, digits and underscore symbols
3001  
3002  starting with an uppercase letter
3003}
3004\end{document}
3005