10SN/A# $NetBSD: varmod-match-escape.mk,v 1.13 2024/04/20 10:18:55 rillig Exp $
29330SN/A#
30SN/A# As of 2020-08-01, the :M and :N modifiers interpret backslashes differently,
40SN/A# depending on whether there was an expression somewhere before the
50SN/A# first backslash or not.  See ParseModifier_Match, "copy = true".
60SN/A#
72362SN/A# Apart from the different and possibly confusing debug output, there is no
80SN/A# difference in behavior.  When parsing the modifier text, only \{, \} and \:
92362SN/A# are unescaped, and in the pattern matching these have the same meaning as
100SN/A# their plain variants '{', '}' and ':'.  In the pattern matching from
110SN/A# Str_Match, only \*, \? or \[ would make a noticeable difference.
120SN/A
130SN/A.MAKEFLAGS: -dcv
140SN/A
150SN/ASPECIALS=	\: : \\ * \*
160SN/A.if ${SPECIALS:M${:U}\:} != ${SPECIALS:M\:${:U}}
170SN/A.  warning unexpected
180SN/A.endif
190SN/A
200SN/A# And now both cases combined: A single modifier with both an escaped ':'
212362SN/A# as well as an expression that expands to a ':'.
222362SN/A#
232362SN/A# XXX: As of 2020-11-01, when an escaped ':' occurs before the
240SN/A# expression, the whole modifier text is subject to unescaping '\:' to ':',
250SN/A# before the expression is expanded.  This means that the '\:' in
260SN/A# the expression is expanded as well, turning ${:U\:} into a simple
270SN/A# ${:U:}, which silently expands to an empty string, instead of generating
280SN/A# an error message.
290SN/A#
300SN/A# XXX: As of 2020-11-01, the modifier on the right-hand side of the
310SN/A# comparison is parsed differently though.  First, the expression
320SN/A# is parsed, resulting in ':' and needSubst=true.  After that, the escaped
330SN/A# ':' is seen, and this time, copy=true is not executed but stays copy=false.
340SN/A# Therefore the escaped ':' is kept as-is, and the final pattern becomes
350SN/A# ':\:'.
360SN/A#
375466SN/A# If ParseModifier_Match had used the same parsing algorithm as Var_Subst,
380SN/A# both patterns would end up as '::'.
390SN/A#
400SN/AVALUES=		: :: :\:
410SN/A.if ${VALUES:M\:${:U\:}} != ${VALUES:M${:U\:}\:}
420SN/A# expect+1: warning: XXX: Oops
430SN/A.  warning XXX: Oops
440SN/A.endif
450SN/A
460SN/A.MAKEFLAGS: -d0
470SN/A
480SN/A# XXX: As of 2020-11-01, unlike all other variable modifiers, a '$' in the
490SN/A# :M and :N modifiers is written as '$$', not as '\$'.  This is confusing,
500SN/A# undocumented and hopefully not used in practice.
510SN/A.if ${:U\$:M$$} != "\$"
520SN/A.  error
530SN/A.endif
540SN/A
550SN/A# XXX: As of 2020-11-01, unlike all other variable modifiers, '\$' is not
560SN/A# parsed as an escaped '$'.  Instead, ParseModifier_Match first scans for
570SN/A# the ':' at the end of the modifier, which results in the pattern '\$'.
580SN/A# No unescaping takes place since the pattern neither contained '\:' nor
590SN/A# '\{' nor '\}'.  But the text is expanded, and a lonely '$' at the end
600SN/A# is silently discarded.  The resulting expanded pattern is thus '\', that
610SN/A# is a single backslash.
620SN/A.if ${:U\$:M\$} != ""
630SN/A.  error
640SN/A.endif
650SN/A
660SN/A# In lint mode, the case of a lonely '$' is covered with an error message.
670SN/A.MAKEFLAGS: -dL
685466SN/A# expect+1: while evaluating "${:U\$:M\$} != """: Dollar followed by nothing
690SN/A.if ${:U\$:M\$} != ""
700SN/A.  error
710SN/A.endif
720SN/A
730SN/A# The control flow of the pattern parser depends on the actual string that
740SN/A# is being matched.  There needs to be either a test that shows a difference
750SN/A# in behavior, or a proof that the behavior does not depend on the actual
760SN/A# string.
770SN/A#
780SN/A# TODO: Str_Match("a-z]", "[a-z]")
790SN/A# TODO: Str_Match("012", "[0-]]")
800SN/A# TODO: Str_Match("[", "[[]")
815466SN/A# TODO: Str_Match("]", "[]")
820SN/A# TODO: Str_Match("]", "[[-]]")
830SN/A
840SN/A# Demonstrate an inconsistency between positive and negative character lists
850SN/A# when the range ends with the character ']'.
860SN/A#
870SN/A# 'A' begins the range, 'B' is in the middle of the range, ']' ends the range,
880SN/A# 'a' is outside the range.
890SN/AWORDS=		A A] A]] B B] B]] ] ]] ]]] a a] a]]
900SN/A# The ']' is part of the character range and at the same time ends the
910SN/A# character list.
920SN/AEXP.[A-]=	A B ]
930SN/A# The first ']' is part of the character range and at the same time ends the
940SN/A# character list.
950SN/AEXP.[A-]]=	A] B] ]]
960SN/A# The first ']' is part of the character range and at the same time ends the
970SN/A# character list.
980SN/AEXP.[A-]]]=	A]] B]] ]]]
990SN/A# For negative character lists, the ']' ends the character range but does not
1000SN/A# end the character list.
1010SN/A# XXX: This is unnecessarily inconsistent but irrelevant in practice as there
1020SN/A# is no practical need for a character range that ends at ']'.
1030SN/AEXP.[^A-]=	a
1040SN/AEXP.[^A-]]=	a
1050SN/AEXP.[^A-]]]=	a]
1060SN/A
1070SN/A.for pattern in [A-] [A-]] [A-]]] [^A-] [^A-]] [^A-]]]
1080SN/A# expect+2: while evaluating variable "WORDS": warning: Unfinished character list in pattern '[A-]' of modifier ':M'
1090SN/A# expect+1: while evaluating variable "WORDS": warning: Unfinished character list in pattern '[^A-]' of modifier ':M'
1100SN/A.  if ${WORDS:M${pattern}} != ${EXP.${pattern}}
1110SN/A.    warning ${pattern}: ${WORDS:M${pattern}} != ${EXP.${pattern}}
1120SN/A.  endif
1130SN/A.endfor
1140SN/A
1150SN/A# In brackets, the backslash is just an ordinary character.
1160SN/A# Outside brackets, it is an escape character for a few special characters.
1170SN/A# TODO: Str_Match("\\", "[\\-]]")
1180SN/A# TODO: Str_Match("-]", "[\\-]]")
1190SN/A
1200SN/Aall:
1210SN/A	@:;
1220SN/A