2019独角兽企业重金招聘Python工程师标准>>>
using System;
using System.Collections.Generic;using System.Text;
using System.Net;
using System.IO;
using Newtonsoft.Json;
using System.Web;
using System.Text.RegularExpressions;namespace Framework
{/// <summary>/// 语言类型/// </summary>public class LanguageType{/// <summary>/// 中文/// </summary>public static string Chinese = "zh-cn";/// <summary>/// 英文/// </summary>public static string English = "en";}/// <summary>/// 翻译方式类型/// </summary>public class TranslationType{/// <summary>/// Google/// </summary>public static string Google = "GoogleTanslater";/// <summary>/// Bing/// </summary>public static string Bing = "MircsoftTanslater";}/// <summary>/// 语言翻译类/// </summary>public class Translater{/// <summary>/// 翻译方法 中文:"zh-cn", 英文:"en" type:MircsoftTanslater,GoogleTanslater/// </summary>/// <param name="sourceText">翻译原文</param>/// <param name="fromLanguage">原始语言</param>/// <param name="toLanguage">目标语言</param>/// <param name="type">翻译API</param>/// <returns>译文</returns>public static string Translate(string sourceText, string fromLanguage, string toLanguage, string type = "MircsoftTanslater"){string translateStr = string.Empty;switch (type){case "MircsoftTanslater":translateStr = MircsoftTanslater(sourceText, fromLanguage, toLanguage);//"zh-cn", "en";break;case "GoogleTanslater":translateStr = GoogleTranslater_PostMethod(sourceText, fromLanguage, toLanguage);//"zh-cn", "en";break;}return translateStr;}#region Google 翻译: Get方式获取翻译/// <summary>/// Google 翻译: Get方式获取翻译/// </summary>/// <param name="sourceText"></param>/// <param name="fromType"></param>/// <param name="toType"></param>/// <returns></returns>private static string GoogleTranslater_GetMethod(string sourceText, string fromType, string toType){string result;string langPair = fromType.ToLower() == "zh-cn" ? "zh|en" : "en|zh";string url = "=1.0&langpair=" + HttpUtility.UrlEncode(langPair) + "&q=" + HttpUtility.UrlEncode(sourceText);HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);request.Method = "GET";request.Referer = "";try{HttpWebResponse response = (HttpWebResponse)request.GetResponse();Stream responseStream = response.GetResponseStream();StreamReader reader = new StreamReader(responseStream, Encoding.GetEncoding("UTF-8"));string responseStr = reader.ReadToEnd();ResponseResult readConfig = (ResponseResult)JavaScriptConvert.DeserializeObject(responseStr, typeof(ResponseResult));if (sponseStatus == "200"){result = anslatedText;}else{result = sponseStatus;}}catch (Exception Ex){result = "err:" + Ex.Message;}return result;}#endregion#region Google 翻译: Post方式获取翻译/// <summary>/// Google 翻译: Post方式获取翻译/// </summary>/// <param name="sourceText"></param>/// <param name="fromType"></param>/// <param name="toType"></param>/// <returns></returns>private static string GoogleTranslater_PostMethod(string sourceText, string fromType, string toType){string fromLan = fromType.ToLower() == "zh-cn" ? "zh" : "en";string toLan = toType.ToLower() == "zh-cn" ? "zh" : "en";HttpWebRequest requestScore = (HttpWebRequest)WebRequest.Create("");StringBuilder postContent = new StringBuilder();Encoding myEncoding = Encoding.UTF8;postContent.Append(HttpUtility.UrlEncode("hl", myEncoding));postContent.Append("=");postContent.Append(HttpUtility.UrlEncode("en", myEncoding));postContent.Append("&");postContent.Append(HttpUtility.UrlEncode("ie", myEncoding));postContent.Append("=");postContent.Append(HttpUtility.UrlEncode("UTF-8", myEncoding));postContent.Append("&");postContent.Append(HttpUtility.UrlEncode("sl", myEncoding));postContent.Append("=");postContent.Append(HttpUtility.UrlEncode(fromLan, myEncoding));postContent.Append("&");postContent.Append(HttpUtility.UrlEncode("text", myEncoding));postContent.Append("=");postContent.Append(HttpUtility.UrlEncode(sourceText, myEncoding));postContent.Append("&");postContent.Append(HttpUtility.UrlEncode("tl", myEncoding));postContent.Append("=");postContent.Append(HttpUtility.UrlEncode(toLan, myEncoding));byte[] data = Encoding.ASCII.GetBytes(postContent.ToString());requestScore.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";requestScore.Method = "Post";//requestScore.ContentType = "application/x-www-form-urlencoded;charset=gb2312";requestScore.ContentLength = data.Length;requestScore.KeepAlive = true;requestScore.Timeout = (6 * 60 * 1000);requestScore.ProtocolVersion = HttpVersion.Version10;Stream stream = requestScore.GetRequestStream();stream.Write(data, 0, data.Length);stream.Close();string content = string.Empty;try{System.Net.ServicePointManager.Expect100Continue = false;HttpWebResponse responseSorce = (HttpWebResponse)requestScore.GetResponse();StreamReader reader = new StreamReader(responseSorce.GetResponseStream());content = reader.ReadToEnd();responseSorce.Close();reader.Dispose();stream.Dispose();}catch (WebException ex){HttpWebResponse responseSorce = (HttpWebResponse)ex.Response;//得到请求网站的详细错误提示StreamReader reader = new StreamReader(responseSorce.GetResponseStream());content = reader.ReadToEnd();responseSorce.Close();reader.Dispose();stream.Dispose();}finally{requestScore.Abort();}string reg = @"<(?<HtmlTag>[w]+)[^>]*s[iI][dD]=(?<Quote>[""']?)result_box(?(Quote)k<Quote>)[""']?[^>]*>((?<Nested><k<HtmlTag>[^>]*>)|</k<HtmlTag>>(?<-Nested>)|.*?)*</k<HtmlTag>>";//string reg = @"<(span) id=result_box [^>]*>.*?</1>";//匹配出翻译内容Regex r = new Regex(reg);MatchCollection mcItem = r.Matches(content);string result = ConvertHtmlToText(mcItem[0].Value);return result;}/// <summary>/// 将HTML转换为纯文本/// </summary>/// <param name="source"></param>/// <returns></returns>public static string ConvertHtmlToText(string source){// 代码的实现的思路是://a、先将html文本中的所有空格、换行符去掉(因为html中的空格和换行是被忽略的)//b、将<head>标记中的所有内容去掉//c、将<script>标记中的所有内容去掉//d、将<style>标记中的所有内容去掉//e、将td换成空格,tr,li,br,p 等标记换成换行符//f、去掉所有以“<>”符号为头尾的标记去掉。//g、转换&,&nbps;等转义字符换成相应的符号//h、去掉多余的空格和空行string result;//remove line breaks,tabsresult = source.Replace("r", " ");result = result.Replace("n", " ");result = result.Replace("t", " ");//remove the headerresult = Regex.Replace(result, "(<head>).*(</head>)", string.Empty, RegexOptions.IgnoreCase);result = Regex.Replace(result, @"<( )*script([^>])*>", "<script>", RegexOptions.IgnoreCase);result = Regex.Replace(result, @"(<script>).*(</script>)", string.Empty, RegexOptions.IgnoreCase);//remove all stylesresult = Regex.Replace(result, @"<( )*style([^>])*>", "<style>", RegexOptions.IgnoreCase); //clearing attributesresult = Regex.Replace(result, "(<style>).*(</style>)", string.Empty, RegexOptions.IgnoreCase);//insert tabs in spaces of <td> tagsresult = Regex.Replace(result, @"<( )*td([^>])*>", " ", RegexOptions.IgnoreCase);//insert line breaks in places of <br> and <li> tagsresult = Regex.Replace(result, @"<( )*br( )*>", "r", RegexOptions.IgnoreCase);result = Regex.Replace(result, @"<( )*li( )*>", "r", RegexOptions.IgnoreCase);//insert line paragraphs in places of <tr> and <p> tagsresult = Regex.Replace(result, @"<( )*tr([^>])*>", "rr", RegexOptions.IgnoreCase);result = Regex.Replace(result, @"<( )*p([^>])*>", "rr", RegexOptions.IgnoreCase);//remove anything thats enclosed inside < >result = Regex.Replace(result, @"<[^>]*>", string.Empty, RegexOptions.IgnoreCase);//replace special characters:result = Regex.Replace(result, @"&", "&", RegexOptions.IgnoreCase);result = Regex.Replace(result, @" ", " ", RegexOptions.IgnoreCase);result = Regex.Replace(result, @"<", "<", RegexOptions.IgnoreCase);result = Regex.Replace(result, @">", ">", RegexOptions.IgnoreCase);result = Regex.Replace(result, @"&(.{2,6});", string.Empty, RegexOptions.IgnoreCase);//remove extra line breaks and tabsresult = Regex.Replace(result, @" ( )+", " ");result = Regex.Replace(result, "(r)( )+(r)", "rr");result = Regex.Replace(result, @"(rr)+", "rn");return result;}#endregion#region 微软翻译/// <summary>/// 微软翻译API : 语言类型:"zh-cn", "en"/// </summary>/// <param name="orgStr">翻译原文</param>/// <param name="fromType">原文语言类型</param>/// <param name="toType">目标语言类型</param>/// <returns></returns>public static string MircsoftTanslater(string orgStr, string fromType, string toType){string content = string.Empty;string appId = "56E164FED4017D272E06AD7E16778536251CA5CB";string text = orgStr;// "Translate this for me";string from = fromType;// "en";string to = toType;// "zh-cn";string uri = ".svc/Translate?appId=" + appId + "&text=" + System.Web.HttpUtility.UrlEncode(text) + "&from=" + from + "&to=" + to;HttpWebRequest httpWebRequest = (HttpWebRequest)WebRequest.Create(uri);WebResponse response = null;try{response = httpWebRequest.GetResponse();StreamReader reader = new StreamReader(response.GetResponseStream());content = reader.ReadToEnd();//"<string xmlns="/">Hello, China</string>" content = content.Replace("<string xmlns="/">", "");content = content.Replace("</string>", "");response.Close();reader.Dispose();}catch (WebException e){content = ProcessWebException(e, "Failed to translate");}finally{if (response != null){response.Close();response = null;}}return content;}private static string ProcessWebException(WebException e, string message){string result = string.Empty;result = string.Format("{0}: {1}", message, e.ToString());// Obtain detailed error informationstring strResponse = string.Empty;using (HttpWebResponse response = (HttpWebResponse)e.Response){using (Stream responseStream = response.GetResponseStream()){using (StreamReader sr = new StreamReader(responseStream, System.Text.Encoding.ASCII)){strResponse = sr.ReadToEnd();}}}result = string.Format("Http status code={0}, error message={1}", e.Status, strResponse);return result;}#endregion}/// <summary>/// 翻译返回类/// </summary>public class ResponseResult{public ResponseData responseData { get; set; }public string responseDetails { get; set; }public string responseStatus { get; set; }}/// <summary>/// /// </summary>public class ResponseData{public string translatedText { get; set; }}
}
转载于:
本文发布于:2024-02-01 22:20:01,感谢您对本站的认可!
本文链接:https://www.4u4v.net/it/170679720339799.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
留言与评论(共有 0 条评论) |