你的位置:首页 > ASP.net教程

[ASP.net教程]Levenshtein Distance算法(编辑距离算法)


using System;using System.Collections.Generic;/* * 作者:熊仔其人 * 时间:2014年4月22日 */namespace DataTool{  /// <summary>  /// 相似度  /// 熊仔其人  /// 2014年4月22日  /// </summary>  public static class LevenshteinDistance  {    #region Levenshtein Distance算法(编辑距离算法)    /// <summary>    /// 三个数字中取最小的一个数字    /// </summary>    /// <param name="first"></param>    /// <param name="second"></param>    /// <param name="third"></param>    /// <returns></returns>    private static int LowerOfThree(int first, int second, int third)    {      int min = first;      if (second < min)        min = second;      if (third < min)        min = third;      return min;    }    /// <summary>    /// 根据Levenshtein Distance算法(编辑距离算法)计算两个字符串的相似度    /// </summary>    /// <param name="text1"></param>    /// <param name="text2"></param>    /// <returns></returns>    private static int Levenshtein_Distance(string text1, string text2)    {      int[,] Matrix;      int n = text1.Length;      int m = text2.Length;      int temp = 0;      char ch1;      char ch2;      int i = 0;      int j = 0;      if (n == 0)      {        return m;      }      if (m == 0)      {        return n;      }      Matrix = new int[n + 1, m + 1];      for (i = 0; i <= n; i++)      {        //初始化第一列        Matrix[i, 0] = i;      }      for (j = 0; j <= m; j++)      {        //初始化第一行        Matrix[0, j] = j;      }      for (i = 1; i <= n; i++)      {        ch1 = text1[i - 1];        for (j = 1; j <= m; j++)        {          ch2 = text2[j - 1];          if (ch1.Equals(ch2))          {            temp = 0;          }          else          {            temp = 1;          }          Matrix[i, j] = LowerOfThree(Matrix[i - 1, j] + 1, Matrix[i, j - 1] + 1, Matrix[i - 1, j - 1] + temp);        }      }      //for (i = 0; i <= n; i++)      //{      //  for (j = 0; j <= m; j++)      //  {      //    Console.Write(" {0} ", Matrix[i, j]);      //  }      //  Console.WriteLine("");      //}      return Matrix[n, m];    }    /// <summary>    /// 根据Levenshtein Distance算法(编辑距离算法)计算两个字符串的相似度(百分比)    /// </summary>    /// <param name="text1">第一个字符串</param>    /// <param name="text2">第二个字符串</param>    /// <returns>相似度(百分比)</returns>    public static decimal LevenshteinDistancePercent(string text1, string text2)    {      if (string.IsNullOrEmpty(text1) && string.IsNullOrEmpty(text2))        return 1;      else if (string.IsNullOrEmpty(text1) || string.IsNullOrEmpty(text2))        return 0;      int maxLenth = text1.Length > text2.Length ? text1.Length : text2.Length;      int val = Levenshtein_Distance(text1, text2);      return 1 - (decimal)val / maxLenth;    }    #endregion    #region 计算两个字符串的相似度(百分比)    /// <summary>    /// 计算两个字符串的相似度(百分比),比较每一个字符组成,返回结果相似度与字符顺序有关,但是并不需要顺序完全一致    /// </summary>    /// <param name="text1">第一个字符串</param>    /// <param name="text2">第二个字符串</param>    /// <returns>相似度(百分比)</returns>    public static decimal SimilarByStringPercent(string text1, string text2)    {      if (string.IsNullOrEmpty(text1) && string.IsNullOrEmpty(text2))        return 1;      else if (string.IsNullOrEmpty(text1) || string.IsNullOrEmpty(text2))        return 0;      decimal returnValue = 0;      int maxLength;      int i, l;      List<string> tb1 = new List<string>();      List<string> tb2 = new List<string>();      i = 0;      l = 1;      maxLength = text1.Length;      if (text1.Length < text2.Length)        maxLength = text2.Length;      while (l <= text1.Length)      {        while (i < text1.Length - 1)        {          if (i + l > text1.Length)            break;          tb1.Add(text1.Substring(i, l));          i++;        }        i = 0;        l++;      }      i = 0;      l = 1;      while (l <= text2.Length)      {        while (i < text2.Length - 1)        {          if (i + l > text2.Length)            break;          tb2.Add(text2.Substring(i, l));          i++;        }        i = 0;        l++;      }      foreach (string subStr in tb1)      {        decimal tempRe = 0;        if (tb2.Contains(subStr))        {          tempRe = (decimal)subStr.Length / maxLength;          if (tempRe > returnValue)            returnValue = tempRe;          if (tempRe == 1)            break;        }      }      return returnValue;    }    #endregion  }}