1289177Speter/* 2289177Speter * similarity.c: Utility functions for finding similar strings in lists 3289177Speter * 4289177Speter * ==================================================================== 5289177Speter * Licensed to the Apache Software Foundation (ASF) under one 6289177Speter * or more contributor license agreements. See the NOTICE file 7289177Speter * distributed with this work for additional information 8289177Speter * regarding copyright ownership. The ASF licenses this file 9289177Speter * to you under the Apache License, Version 2.0 (the 10289177Speter * "License"); you may not use this file except in compliance 11289177Speter * with the License. You may obtain a copy of the License at 12289177Speter * 13289177Speter * http://www.apache.org/licenses/LICENSE-2.0 14289177Speter * 15289177Speter * Unless required by applicable law or agreed to in writing, 16289177Speter * software distributed under the License is distributed on an 17289177Speter * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 18289177Speter * KIND, either express or implied. See the License for the 19289177Speter * specific language governing permissions and limitations 20289177Speter * under the License. 21289177Speter * ==================================================================== 22289177Speter */ 23289177Speter 24289177Speter/* ==================================================================== */ 25289177Speter 26289177Speter 27289177Speter 28289177Speter/*** Includes. ***/ 29289177Speter 30289177Speter#include <stdlib.h> 31289177Speter 32289177Speter#include "svn_string.h" 33289177Speter#include "cl.h" 34289177Speter 35289177Speter#include "private/svn_string_private.h" 36289177Speter 37289177Speter#include "svn_private_config.h" 38289177Speter 39289177Speter 40289177Speter/* Context for token similarity checking */ 41289177Speterstruct svn_cl__simcheck_context_t 42289177Speter{ 43289177Speter svn_string_t key; /* The token we're comparing with */ 44289177Speter svn_membuf_t buffer; /* Buffer for similarity testing */ 45289177Speter}; 46289177Speter 47289177Speter 48289177Speter/* Similarity test between two property names */ 49289177Speterstatic APR_INLINE apr_size_t 50289177Spetersimcheck_key_diff(const svn_string_t *key, const svn_string_t *ctx, 51289177Speter svn_membuf_t *buffer, apr_size_t *diff) 52289177Speter{ 53289177Speter apr_size_t lcs; 54289177Speter const apr_size_t score = svn_string__similarity(key, ctx, buffer, &lcs); 55289177Speter if (key->len > ctx->len) 56289177Speter *diff = key->len - lcs; 57289177Speter else 58289177Speter *diff = ctx->len - lcs; 59289177Speter return score; 60289177Speter} 61289177Speter 62289177Speter 63289177Speter/* Key comparator for qsort for svn_cl__simcheck_t */ 64289177Speterstatic int 65289177Spetersimcheck_compare(const void *pkeya, const void *pkeyb) 66289177Speter{ 67289177Speter svn_cl__simcheck_t *const keya = *(svn_cl__simcheck_t *const *)pkeya; 68289177Speter svn_cl__simcheck_t *const keyb = *(svn_cl__simcheck_t *const *)pkeyb; 69289177Speter svn_cl__simcheck_context_t *const context = keya->context; 70289177Speter 71289177Speter if (keya->score == -1) 72289177Speter keya->score = simcheck_key_diff(&keya->token, &context->key, 73289177Speter &context->buffer, &keya->diff); 74289177Speter if (keyb->score == -1) 75289177Speter keyb->score = simcheck_key_diff(&keyb->token, &context->key, 76289177Speter &context->buffer, &keyb->diff); 77289177Speter 78289177Speter return (keya->score < keyb->score ? 1 79289177Speter : (keya->score > keyb->score ? -1 80289177Speter : (keya->diff > keyb->diff ? 1 81289177Speter : (keya->diff < keyb->diff ? -1 : 0)))); 82289177Speter} 83289177Speter 84289177Speterapr_size_t 85289177Spetersvn_cl__similarity_check(const char *key, 86289177Speter svn_cl__simcheck_t **tokens, 87289177Speter apr_size_t token_count, 88289177Speter apr_pool_t *scratch_pool) 89289177Speter{ 90289177Speter apr_size_t result; 91289177Speter apr_size_t i; 92289177Speter 93289177Speter svn_cl__simcheck_context_t context; 94289177Speter context.key.data = key; 95289177Speter context.key.len = strlen(key); 96289177Speter svn_membuf__create(&context.buffer, 0, scratch_pool); 97289177Speter 98289177Speter /* Populate the score, diff and context members. */ 99289177Speter for (i = 0; i < token_count; ++i) 100289177Speter { 101289177Speter svn_cl__simcheck_t *const token = tokens[i]; 102289177Speter token->score = -1; 103289177Speter token->diff = 0; 104289177Speter token->context = &context; 105289177Speter } 106289177Speter 107289177Speter /* Sort the tokens by similarity. */ 108289177Speter qsort(tokens, token_count, sizeof(*tokens), simcheck_compare); 109289177Speter 110289177Speter /* Remove references to the context, since it points to the stack, 111289177Speter and calculate the number of results that are at least two-thirds 112289177Speter similar to the key. */ 113289177Speter for (i = 0, result = 1; i < token_count; ++i) 114289177Speter { 115289177Speter svn_cl__simcheck_t *const token = tokens[i]; 116289177Speter token->context = NULL; 117289177Speter /* If you update this factor, consider updating 118289177Speter * ../libsvn_subr/cmdline.c:most_similar(). */ 119289177Speter if (token->score >= (2 * SVN_STRING__SIM_RANGE_MAX + 1) / 3) 120289177Speter ++result; 121289177Speter } 122289177Speter 123289177Speter if (0 == tokens[0]->diff) 124289177Speter return 0; /* We found an exact match. */ 125289177Speter return result; 126289177Speter} 127