讯飞听见语音转文字Python源码,这个只能 转中文和英文,免费的转换不能超过3分钟。
# -*- coding: utf-8 -*- # ☯ Author: ****** # ☯ Email : ******@****.*** # ☯ Date : 2021/06/24 20:13 import os import re import time import random import logging import datetime import requests from logging import handlers from collections import OrderedDict from urllib3 import encode_multipart_formdata # 日志模块 class LogGer(object): def __init__(self, name): os.makedirs("./log") if not os.path.exists("./log") else None # 创建日志文件文件夹 get_logger_a = logging.getLogger() get_logger_a.setLevel(logging.INFO) # 设置默认级别 formatter = logging.Formatter('%(levelname)s %(asctime)s %(filename)s[line:%(lineno)d]: %(message)s') log_file_path = './log/{}_{}.log'.format(name, time.strftime('%Y%m%d')) rotating_handler = handlers.RotatingFileHandler( log_file_path, maxBytes=20 * 1024 * 1024, backupCount=10, encoding='utf-8') rotating_handler.setFormatter(formatter) get_logger_a.addHandler(rotating_handler) stream_handler = logging.StreamHandler() stream_handler.setFormatter(formatter) get_logger_a.addHandler(stream_handler) # 过滤级别:控制台输出INFO和WARNING级别,文件只记录WARNING级别 info_filter = logging.Filter() info_filter.filter = lambda record: record.levelno < logging.WARNING # 设置过滤等级 warn_filter = logging.Filter() warn_filter.filter = lambda record: record.levelno >= logging.WARNING # stream_handler.addFilter(info_filter) rotating_handler.addFilter(warn_filter) # 爬虫主程序 class TestSpider: LogGer(str(os.path.basename(__file__))[:-3]) logging.info('system is working now.') def __init__(self): pass # 随机头 @staticmethod def random_ua(): return "Mozilla/5.0 (Windows NT {}) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{} Safari/537.36".format( random.choice([ '10.0; Win64; x64', '10.0; WOW64', '10.0', '6.2; WOW64', '6.2; Win64; x64', '6.2', '6.1', '6.1; Win64; x64', '6.1; WOW64' ]), random.choice([ '70.0.3538.16', '70.0.3538.67', '70.0.3538.97', '71.0.3578.137', '71.0.3578.30', '71.0.3578.33', '71.0.3578.80', '72.0.3626.69', '72.0.3626.7', '73.0.3683.20', '73.0.3683.68', '74.0.3729.6', '75.0.3770.140', '75.0.3770.8', '75.0.3770.90', '76.0.3809.12', '76.0.3809.126', '76.0.3809.25', '76.0.3809.68', '77.0.3865.10', '77.0.3865.40', '78.0.3904.105', '78.0.3904.11', '78.0.3904.70', '79.0.3945.16', '79.0.3945.36', '80.0.3987.106', '80.0.3987.16', '81.0.4044.138', '81.0.4044.20', '81.0.4044.69', '83.0.4103.14', '83.0.4103.39', '84.0.4147.30', '85.0.4183.38', '85.0.4183.83', '85.0.4183.87', '86.0.4240.22', '87.0.4280.20', '87.0.4280.88', '88.0.4324.27' ])) # 下载器 def requester(self, url, retry=3, **kwargs): """ :param url: 必须传入的url :param retry: 不需要自己传,自动忽视 :param kwargs: 传递需要的参数,必须“参数名=参数” :return: 返回正常访问的内容或者返回None """ try: return requests.request( url=url, method=kwargs.get('method') if kwargs.get('method') else 'get', timeout=kwargs.get('timeout') if kwargs.get('timeout') else 30, params=kwargs.get('params'), data=kwargs.get('data'), files=kwargs.get('files'), json=kwargs.get('json'), cookies=kwargs.get('cookies'), allow_redirects=True if kwargs.get('allow_redirects') in [None, True] else False, proxies={ 'http': 'http://{}'.format(kwargs.get('proxies')), 'https': 'http://{}'.format(kwargs.get('proxies')) } if kwargs.get('proxies') else None, headers=kwargs.get('headers') if kwargs.get('headers') else { 'Accept': '*/*', 'Connection': 'close', 'Accept-Encoding': 'gzip, deflate, br', 'User-Agent': self.random_ua() }, verify=False ) except Exception as e: return logging.warning('request error:%s' % e) if retry < 1 else self.requester(url, retry - 1, **kwargs) def upload(self, file_path: str, **kwargs: any) -> None or dict: c = str(kwargs.get("code")) if kwargs.get("code") and len(str(kwargs.get("code"))) == 16 else "s4Qyl0knnW8pjpDK" response = self.requester( method="post", url="https://www.iflyrec.com/AudioStreamService/v1/audios", params={"type": "whole", "folder": f"{random.randint(10000000000000000, 99999999999999999)}"}, headers={ 'Host': 'www.iflyrec.com', 'Connection': 'close', 'User-Agent': self.random_ua(), 'X-Biz-Id': 'xftj', 'Content-Type': f'multipart/form-data; boundary=----WebKitFormBoundary{c}', 'Accept': '*/*', 'Origin': 'https://www.iflyrec.com', 'Referer': 'https://www.iflyrec.com/html/addMachineOrder.html', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9' }, data=encode_multipart_formdata(OrderedDict([ ("language", (None, 1, 'multipart/form-data')), ("id", (None, 'WU_FILE_0', 'multipart/form-data')), ('name', (None, os.path.basename(file_path), 'multipart/form-data')), ('type', (None, 'audio/wav', 'multipart/form-data')), ('lastModifiedDate', ( None, datetime.datetime.utcnow().strftime('%a %b %d %Y %H:%M:%S GMT+0800') + " (中国标准时间)", 'multipart/form-data' )), ('size', (None, os.path.getsize(file_path), 'multipart/form-data')), ('file', (os.path.basename(file_path), open(file_path, "rb").read(), 'audio/wav')), ]), boundary=f'----WebKitFormBoundary{c}')[0], proxies=kwargs.get('proxies') ) # request error if response is None: return logging.warning(f"上传失败了,请测试网络连接情况,file:{file_path}") # response error if "uploadedSize" not in response.text: return logging.warning(f"上传响应结果中不不包含关键字,file:{file_path}") elif response.json().get('biz').get('uploadedSize') is None: return logging.warning(f"上传响应中关键值缺失,file:{file_path}") return response.json() def crc32check(self, fileid: str, crc32: str) -> None or bool: response = self.requester( url=f"https://www.iflyrec.com/TranscriptOrderService/v1/tempAudios/{fileid}/initAudioInfo?crc32={crc32}", method="post", headers={ 'Host': 'www.iflyrec.com', 'Connection': 'close', 'Accept': 'application/json, text/javascript, */*; q=0.01', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': self.random_ua(), 'X-Biz-Id': 'xftj', 'Origin': 'https://www.iflyrec.com', 'Referer': 'https://www.iflyrec.com/html/addMachineOrder.html', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9', }) if response is None: return logging.warning("校验失败:网络不通...") return True if "000000" in response.text else False def distinguish(self, path, mode=False): response = self.requester( method="get" if mode else "post", url="https://www.iflyrec.com/TranscriptPreviewService/v1/aiTranscriptPreviews", headers={ 'Host': 'www.iflyrec.com', 'Connection': 'close', 'Accept': 'application/json', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': "Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36", 'X-Biz-Id': 'xftj', 'Content-Type': 'application/json;charset=UTF-8', 'Origin': 'https://www.iflyrec.com', 'Referer': 'https://www.iflyrec.com/html/addMachineOrder.html', 'Accept-Encoding': 'gzip, deflate', 'Accept-Language': 'zh-CN,zh;q=0.9' }, params={"filePath": path, "transcriptLanguage": 1}, json={"filePath": path, "transcriptLanguage": 1} if mode is False else None, ) if response is None: return logging.warning("识别失败:网络故障") if mode is False and "true" in response.text: return self.distinguish(path, True) if mode is True and "speaker" in response.text: info = response.json()['biz']['transcriptResult'] return "".join([i.strip() for i in re.findall(r'''"content":"([\S\s]*?)"''', info)]) else: return logging.warning("识别失败:未知错误") @staticmethod def write2txt(path, content): with open(path, "a+", encoding="utf-8") as f: f.write(content + "\n") # 运行入口 def run(self): # 提交数据 res = self.upload(r"C:\Users\Administrator\Desktop\英语\4.wav") if res is None: return # crc32 验证 msg = self.crc32check(**{"fileid": res['biz']['fileId'], "crc32": res['biz']['crc32']}) if msg is not True: return # 开始识别 text = self.distinguish(res['biz']['transPreviewPath']) print(f"获取响应结果:{text}") if __name__ == '__main__': start = TestSpider() start.run()