bmake/unit-tests/varmod-match-escape.mk

0SN/A# $NetBSD: varmod-match-escape.mk,v 1.13 2024/04/20 10:18:55 rillig Exp $
9330SN/A#
0SN/A# As of 2020-08-01, the :M and :N modifiers interpret backslashes differently,
0SN/A# depending on whether there was an expression somewhere before the
0SN/A# first backslash or not.  See ParseModifier_Match, "copy = true".
0SN/A#
2362SN/A# Apart from the different and possibly confusing debug output, there is no
0SN/A# difference in behavior.  When parsing the modifier text, only \{, \} and \:
2362SN/A# are unescaped, and in the pattern matching these have the same meaning as
0SN/A# their plain variants '{', '}' and ':'.  In the pattern matching from
0SN/A# Str_Match, only \*, \? or \[ would make a noticeable difference.
0SN/A
0SN/A.MAKEFLAGS: -dcv
0SN/A
0SN/ASPECIALS=	\: : \\ * \*
0SN/A.if ${SPECIALS:M${:U}\:} != ${SPECIALS:M\:${:U}}
0SN/A.  warning unexpected
0SN/A.endif
0SN/A
0SN/A# And now both cases combined: A single modifier with both an escaped ':'
2362SN/A# as well as an expression that expands to a ':'.
2362SN/A#
2362SN/A# XXX: As of 2020-11-01, when an escaped ':' occurs before the
0SN/A# expression, the whole modifier text is subject to unescaping '\:' to ':',
0SN/A# before the expression is expanded.  This means that the '\:' in
0SN/A# the expression is expanded as well, turning ${:U\:} into a simple
0SN/A# ${:U:}, which silently expands to an empty string, instead of generating
0SN/A# an error message.
0SN/A#
0SN/A# XXX: As of 2020-11-01, the modifier on the right-hand side of the
0SN/A# comparison is parsed differently though.  First, the expression
0SN/A# is parsed, resulting in ':' and needSubst=true.  After that, the escaped
0SN/A# ':' is seen, and this time, copy=true is not executed but stays copy=false.
0SN/A# Therefore the escaped ':' is kept as-is, and the final pattern becomes
0SN/A# ':\:'.
0SN/A#
5466SN/A# If ParseModifier_Match had used the same parsing algorithm as Var_Subst,
0SN/A# both patterns would end up as '::'.
0SN/A#
0SN/AVALUES=		: :: :\:
0SN/A.if ${VALUES:M\:${:U\:}} != ${VALUES:M${:U\:}\:}
0SN/A# expect+1: warning: XXX: Oops
0SN/A.  warning XXX: Oops
0SN/A.endif
0SN/A
0SN/A.MAKEFLAGS: -d0
0SN/A
0SN/A# XXX: As of 2020-11-01, unlike all other variable modifiers, a '$' in the
0SN/A# :M and :N modifiers is written as '$$', not as '\$'.  This is confusing,
0SN/A# undocumented and hopefully not used in practice.
0SN/A.if ${:U\$:M$$} != "\$"
0SN/A.  error
0SN/A.endif
0SN/A
0SN/A# XXX: As of 2020-11-01, unlike all other variable modifiers, '\$' is not
0SN/A# parsed as an escaped '$'.  Instead, ParseModifier_Match first scans for
0SN/A# the ':' at the end of the modifier, which results in the pattern '\$'.
0SN/A# No unescaping takes place since the pattern neither contained '\:' nor
0SN/A# '\{' nor '\}'.  But the text is expanded, and a lonely '$' at the end
0SN/A# is silently discarded.  The resulting expanded pattern is thus '\', that
0SN/A# is a single backslash.
0SN/A.if ${:U\$:M\$} != ""
0SN/A.  error
0SN/A.endif
0SN/A
0SN/A# In lint mode, the case of a lonely '$' is covered with an error message.
0SN/A.MAKEFLAGS: -dL
5466SN/A# expect+1: while evaluating "${:U\$:M\$} != """: Dollar followed by nothing
0SN/A.if ${:U\$:M\$} != ""
0SN/A.  error
0SN/A.endif
0SN/A
0SN/A# The control flow of the pattern parser depends on the actual string that
0SN/A# is being matched.  There needs to be either a test that shows a difference
0SN/A# in behavior, or a proof that the behavior does not depend on the actual
0SN/A# string.
0SN/A#
0SN/A# TODO: Str_Match("a-z]", "[a-z]")
0SN/A# TODO: Str_Match("012", "[0-]]")
0SN/A# TODO: Str_Match("[", "[[]")
5466SN/A# TODO: Str_Match("]", "[]")
0SN/A# TODO: Str_Match("]", "[[-]]")
0SN/A
0SN/A# Demonstrate an inconsistency between positive and negative character lists
0SN/A# when the range ends with the character ']'.
0SN/A#
0SN/A# 'A' begins the range, 'B' is in the middle of the range, ']' ends the range,
0SN/A# 'a' is outside the range.
0SN/AWORDS=		A A] A]] B B] B]] ] ]] ]]] a a] a]]
0SN/A# The ']' is part of the character range and at the same time ends the
0SN/A# character list.
0SN/AEXP.[A-]=	A B ]
0SN/A# The first ']' is part of the character range and at the same time ends the
0SN/A# character list.
0SN/AEXP.[A-]]=	A] B] ]]
0SN/A# The first ']' is part of the character range and at the same time ends the
0SN/A# character list.
0SN/AEXP.[A-]]]=	A]] B]] ]]]
0SN/A# For negative character lists, the ']' ends the character range but does not
0SN/A# end the character list.
0SN/A# XXX: This is unnecessarily inconsistent but irrelevant in practice as there
0SN/A# is no practical need for a character range that ends at ']'.
0SN/AEXP.[^A-]=	a
0SN/AEXP.[^A-]]=	a
0SN/AEXP.[^A-]]]=	a]
0SN/A
0SN/A.for pattern in [A-] [A-]] [A-]]] [^A-] [^A-]] [^A-]]]
0SN/A# expect+2: while evaluating variable "WORDS": warning: Unfinished character list in pattern '[A-]' of modifier ':M'
0SN/A# expect+1: while evaluating variable "WORDS": warning: Unfinished character list in pattern '[^A-]' of modifier ':M'
0SN/A.  if ${WORDS:M${pattern}} != ${EXP.${pattern}}
0SN/A.    warning ${pattern}: ${WORDS:M${pattern}} != ${EXP.${pattern}}
0SN/A.  endif
0SN/A.endfor
0SN/A
0SN/A# In brackets, the backslash is just an ordinary character.
0SN/A# Outside brackets, it is an escape character for a few special characters.
0SN/A# TODO: Str_Match("\\", "[\\-]]")
0SN/A# TODO: Str_Match("-]", "[\\-]]")
0SN/A
0SN/Aall:
0SN/A	@:;
0SN/A