1/* 2 * similarity.c: Utility functions for finding similar strings in lists 3 * 4 * ==================================================================== 5 * Licensed to the Apache Software Foundation (ASF) under one 6 * or more contributor license agreements. See the NOTICE file 7 * distributed with this work for additional information 8 * regarding copyright ownership. The ASF licenses this file 9 * to you under the Apache License, Version 2.0 (the 10 * "License"); you may not use this file except in compliance 11 * with the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, 16 * software distributed under the License is distributed on an 17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18 * KIND, either express or implied. See the License for the 19 * specific language governing permissions and limitations 20 * under the License. 21 * ==================================================================== 22 */ 23 24/* ==================================================================== */ 25 26 27 28/*** Includes. ***/ 29 30#include <stdlib.h> 31 32#include "svn_string.h" 33#include "cl.h" 34 35#include "private/svn_string_private.h" 36 37#include "svn_private_config.h" 38 39 40/* Context for token similarity checking */ 41struct svn_cl__simcheck_context_t 42{ 43 svn_string_t key; /* The token we're comparing with */ 44 svn_membuf_t buffer; /* Buffer for similarity testing */ 45}; 46 47 48/* Similarity test between two property names */ 49static APR_INLINE apr_size_t 50simcheck_key_diff(const svn_string_t *key, const svn_string_t *ctx, 51 svn_membuf_t *buffer, apr_size_t *diff) 52{ 53 apr_size_t lcs; 54 const apr_size_t score = svn_string__similarity(key, ctx, buffer, &lcs); 55 if (key->len > ctx->len) 56 *diff = key->len - lcs; 57 else 58 *diff = ctx->len - lcs; 59 return score; 60} 61 62 63/* Key comparator for qsort for svn_cl__simcheck_t */ 64static int 65simcheck_compare(const void *pkeya, const void *pkeyb) 66{ 67 svn_cl__simcheck_t *const keya = *(svn_cl__simcheck_t *const *)pkeya; 68 svn_cl__simcheck_t *const keyb = *(svn_cl__simcheck_t *const *)pkeyb; 69 svn_cl__simcheck_context_t *const context = keya->context; 70 71 if (keya->score == -1) 72 keya->score = simcheck_key_diff(&keya->token, &context->key, 73 &context->buffer, &keya->diff); 74 if (keyb->score == -1) 75 keyb->score = simcheck_key_diff(&keyb->token, &context->key, 76 &context->buffer, &keyb->diff); 77 78 return (keya->score < keyb->score ? 1 79 : (keya->score > keyb->score ? -1 80 : (keya->diff > keyb->diff ? 1 81 : (keya->diff < keyb->diff ? -1 : 0)))); 82} 83 84apr_size_t 85svn_cl__similarity_check(const char *key, 86 svn_cl__simcheck_t **tokens, 87 apr_size_t token_count, 88 apr_pool_t *scratch_pool) 89{ 90 apr_size_t result; 91 apr_size_t i; 92 93 svn_cl__simcheck_context_t context; 94 context.key.data = key; 95 context.key.len = strlen(key); 96 svn_membuf__create(&context.buffer, 0, scratch_pool); 97 98 /* Populate the score, diff and context members. */ 99 for (i = 0; i < token_count; ++i) 100 { 101 svn_cl__simcheck_t *const token = tokens[i]; 102 token->score = -1; 103 token->diff = 0; 104 token->context = &context; 105 } 106 107 /* Sort the tokens by similarity. */ 108 qsort(tokens, token_count, sizeof(*tokens), simcheck_compare); 109 110 /* Remove references to the context, since it points to the stack, 111 and calculate the number of results that are at least two-thirds 112 similar to the key. */ 113 for (i = 0, result = 1; i < token_count; ++i) 114 { 115 svn_cl__simcheck_t *const token = tokens[i]; 116 token->context = NULL; 117 /* If you update this factor, consider updating 118 * ../libsvn_subr/cmdline.c:most_similar(). */ 119 if (token->score >= (2 * SVN_STRING__SIM_RANGE_MAX + 1) / 3) 120 ++result; 121 } 122 123 if (0 == tokens[0]->diff) 124 return 0; /* We found an exact match. */ 125 return result; 126} 127