1/*
2 * eol.c :  generic eol/keyword routines
3 *
4 * ====================================================================
5 *    Licensed to the Apache Software Foundation (ASF) under one
6 *    or more contributor license agreements.  See the NOTICE file
7 *    distributed with this work for additional information
8 *    regarding copyright ownership.  The ASF licenses this file
9 *    to you under the Apache License, Version 2.0 (the
10 *    "License"); you may not use this file except in compliance
11 *    with the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 *    Unless required by applicable law or agreed to in writing,
16 *    software distributed under the License is distributed on an
17 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 *    KIND, either express or implied.  See the License for the
19 *    specific language governing permissions and limitations
20 *    under the License.
21 * ====================================================================
22 */
23
24
25
26#define APR_WANT_STRFUNC
27
28#include <apr_file_io.h>
29#include "svn_io.h"
30#include "private/svn_eol_private.h"
31#include "private/svn_dep_compat.h"
32
33/* Machine-word-sized masks used in svn_eol__find_eol_start.
34 */
35char *
36svn_eol__find_eol_start(char *buf, apr_size_t len)
37{
38#if !SVN_UNALIGNED_ACCESS_IS_OK
39
40  /* On some systems, we need to make sure that buf is properly aligned
41   * for chunky data access. This overhead is still justified because
42   * only lines tend to be tens of chars long.
43   */
44  for (; (len > 0) && ((apr_uintptr_t)buf) & (sizeof(apr_uintptr_t)-1)
45       ; ++buf, --len)
46  {
47    if (*buf == '\n' || *buf == '\r')
48      return buf;
49  }
50
51#endif
52
53  /* Scan the input one machine word at a time. */
54  for (; len > sizeof(apr_uintptr_t)
55       ; buf += sizeof(apr_uintptr_t), len -= sizeof(apr_uintptr_t))
56  {
57    /* This is a variant of the well-known strlen test: */
58    apr_uintptr_t chunk = *(const apr_uintptr_t *)buf;
59
60    /* A byte in SVN__R_TEST is \0, iff it was \r in *BUF.
61     * Similarly, SVN__N_TEST is an indicator for \n. */
62    apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
63    apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
64
65    /* A byte in SVN__R_TEST can by < 0x80, iff it has been \0 before
66     * (i.e. \r in *BUF). Dito for SVN__N_TEST. */
67    r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
68    n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
69
70    /* Check whether at least one of the words contains a byte <0x80
71     * (if one is detected, there was a \r or \n in CHUNK). */
72    if ((r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET)
73      break;
74  }
75
76  /* The remaining odd bytes will be examined the naive way: */
77  for (; len > 0; ++buf, --len)
78    {
79      if (*buf == '\n' || *buf == '\r')
80        return buf;
81    }
82
83  return NULL;
84}
85
86const char *
87svn_eol__detect_eol(char *buf, apr_size_t len, char **eolp)
88{
89  char *eol;
90
91  eol = svn_eol__find_eol_start(buf, len);
92  if (eol)
93    {
94      if (eolp)
95        *eolp = eol;
96
97      if (*eol == '\n')
98        return "\n";
99
100      /* We found a CR. */
101      ++eol;
102      if (eol == buf + len || *eol != '\n')
103        return "\r";
104      return "\r\n";
105    }
106
107  return NULL;
108}
109