From 45cdd97e7c798fe2ca4add7d15250e7adfa1c68b Mon Sep 17 00:00:00 2001 From: chengxl Date: Thu, 7 Aug 2025 13:54:10 +0800 Subject: [PATCH] =?UTF-8?q?redis=E8=B0=83=E6=95=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- application/common/OpenAi.php | 817 +++++++++++++++++++++++++++++----- 1 file changed, 694 insertions(+), 123 deletions(-) diff --git a/application/common/OpenAi.php b/application/common/OpenAi.php index e2219b6..a4a1e8f 100644 --- a/application/common/OpenAi.php +++ b/application/common/OpenAi.php @@ -70,20 +70,19 @@ class OpenAi //公微问题模版 protected $aWechatQuestion = [ - 'system_message' => '您是一位医学期刊的医学科普转化专家,严格遵循用户要求的结构、语言和专业约束,不编造数据,不夸大结论,擅长将复杂的医学研究论文转化为适合微信公众号推送的专业科普内容。请根据提供的医学论文信息,按照以下严格格式生成结构化[JSON结构]输出[中文]:', + 'system_message' => '您是一位医学期刊的医学科普转化专家,严格遵循用户要求的结构、语言和专业约束,不编造数据,不夸大结论,擅长将复杂的医学研究论文转化为适合微信公众号推送的专业科普内容。请根据提供的医学论文信息,严格按要求生成内容[中文、可解析的JSON结构]:', 'public_message' => [ "covered" => "[列出文章涵盖的学科及研究方法,总字数不超过100字,学科和方法之间用逗号分隔,例如:肿瘤学,分子生物学,基因组测序,生物信息学分析]", "title_chinese" => "[将标题翻译成中文:内容需自然流畅、口语化、连贯性、学术性]" - // , - // "content" => "将内容翻译成中文,需自然流畅、口语化、连贯性、学术性,保留原文的章节结构和图表编号" ], 'default' => [ "digest" => "[学术规范翻译并提炼摘要,强调逻辑性、科学术语准确性和表达严谨性,采用段落形式,注意内容不要和文章内容有严重重复,总字数不超过500字]", "research_background" => "[提炼研究背景,采用连贯的段落形式,注意内容不要和文章内容有严重重复,总字数超过200字]", - "discussion_results" => "[针对文章简单总结讨论和结果,采用连贯的段落形式,注意内容不要和文章内容有严重重复,总字数超过450字]", - "research_method" => "[总结文章的研究方法,采用连贯的段落形式,注意内容不要和文章内容有严重重复,总字数超过300字]", - "prospect" => "[针对稿件内容进行展望撰写,注意内容不要和文章内容有严重重复,采用连贯的段落形式]", - "highlights" => "[总结归纳亮点,至少3点,每点用分号分隔]" + "results" => "[针对结果进行简单总结,采用连贯的段落形式,注意内容不要和文章内容有严重重复,总字数超过450字]", + "discussion" => "[针对讨论进行简单总结,采用连贯的段落形式,注意内容不要和文章内容有严重重复,总字数超过450字]", + "research_method" => "[针对研究方法进行简单总结,采用连贯的段落形式,注意内容不要和文章内容有严重重复,总字数超过300字]", + "discussion" => "[针对讨论进行简单总结,采用连贯的段落形式,注意内容不要和文章内容有严重重复,总字数超过450字]", + "conclusion" => "[针对结论进行简单总结,采用连贯的段落形式,注意内容不要和文章内容有严重重复,总字数超过450字]", ], 'review' => [ "overview" => "按照学术规范翻译并提炼文章概述,整体内容应大于1200字,其中应包含文章背景(不少于400字),其他内容提炼更强调逻辑性、科学术语准确性和表达的严谨性,注意内容不要和文章内容有严重重复,采用连贯的段落形式", @@ -130,20 +129,194 @@ class OpenAi if(empty($aSearch)){ return []; } - //文章类型 - $prompt_article_type = empty($aSearch['prompt_article_type']) ? '' : $aSearch['prompt_article_type']; - if(empty($prompt_article_type)){ + //文章类型-选择模版 + $prompt_article_type = empty($aSearch['prompt_article_type']) ? 'default' : $aSearch['prompt_article_type']; + + //判断内容是否为空 + $aMainContent = empty($aSearch['main_content']) ? [] : $aSearch['main_content']; + if(empty($aMainContent)){ return []; } - //组织参数 - if(in_array($prompt_article_type, ['Mini Review','Review'])){ - $prompt_article_type = 'review'; - }else{ - $prompt_article_type = 'default'; + + //处理文章内容 + $aMainListResult = $this->dealArticleContent($aMainContent); + $aMainList = empty($aMainListResult['main_list']) ? [] : $aMainListResult['main_list'];//文字列表 + $aMainImageList = empty($aMainListResult['main_image_list']) ? [] : $aMainListResult['main_image_list'];//图片列表 + if(empty($aMainList)){ + return []; } + //获取问题 $aQuestion = $this->aWechatQuestion; - $aQuestionLists = empty($aQuestion[$prompt_article_type]) ? [] : $aQuestion[$prompt_article_type]; + //系统角色 + $sSysMessagePrompt = empty($aQuestion['system_message']) ? '' : $aQuestion['system_message']; + if(empty($sSysMessagePrompt)){ + return []; + } + + //公共问题 + $aMessage = []; + $aPublicQuestion = empty($aQuestion['public_message']) ? [] : $aQuestion['public_message']; + if(!empty($aPublicQuestion)){ + foreach ($aPublicQuestion as $key => $value) { + //系统问题信息 + $sInfo = json_encode([$key => $value],JSON_UNESCAPED_UNICODE); + $sSysMessagePromptInfo = $sSysMessagePrompt.$sInfo; + //用户输入内容 + if($key == 'title_chinese'){ + $sUserPrompt = empty($aSearch['{#title_chinese#}']) ? '' : $aSearch['{#title_chinese#}']; + } + if($key == 'covered'){ + $sUserPrompt = empty($aSearch['{#title_chinese#}']) ? '' : $aSearch['{#title_chinese#}']; + $sUserPrompt .= empty($aSearch['{#abstract#}']) ? '' : $aSearch['{#abstract#}']; + } + $aMessage[] = [ + ['role' => 'system', 'content' => $sSysMessagePromptInfo], + ['role' => 'user', 'content' => empty($sUserPrompt) ? '' : $sUserPrompt] + ]; + } + } + if(in_array($prompt_article_type, ['Mini Review','Review'])){ + $aArticleQuestion = $this->dealReviewQuestion($aMainList,$aMainImageList); + }else{ + $aArticleQuestion = $this->dealDefaultQuestion($aMainList,$aMainImageList); + } + $aMessage = array_merge($aMessage,$aArticleQuestion); + return $aMessage; + } + /** + * 处理文章内容 + */ + private function dealArticleContent($aParam = []){ + //内容 + $aArticleMain = empty($aParam['main']) ? [] : $aParam['main']; + //内容-一级标题 + $aArticleMainH1 = empty($aParam['main_h1']) ? [] : $aParam['main_h1']; + //处理数据 + $aMainList = $aMainImageList = []; + if(empty($aArticleMain) || empty($aArticleMainH1)){ + return $aMainList; + } + //数据处理 + foreach ($aArticleMain as $key => $value) { + $sValue = trim($value['content']); + // 将HTML实体转换为Unicode字符 + $sValue = html_entity_decode($sValue, ENT_QUOTES, 'UTF-8'); + // 过滤掉所有空格字符(包括普通空格和NBSP) + $sValue = preg_replace('/\s+/', ' ', $sValue); + if(empty($sValue) || $sValue == " "){ + continue; + } + $sKey = $this->isIdInRange($value['sort'],$aArticleMainH1); + if(!empty($sKey)){ + $sKey = strtolower(str_replace(' ', '_', strip_tags($sKey))); + $value['content'] = strip_tags($value['content']); + if($value['type'] == 1){ + $aMainImageList[$sKey][] = $value['ami_id']; + continue; + } + $aMainList[$sKey][] = $value; + }else{ + $value['content'] = strip_tags($value['content']); + $aMainList['digest'][] = $value; + } + } + return ['main_list' => $aMainList,'main_image_list' => $aMainImageList]; + } + + /** + * 判断ID是否在目标区间数组的范围内 + * @param int $id 待判断的ID + * @param array $rangeMap 区间规则数组 + * @return bool|string 存在则返回所属标题,否则返回false + */ + private function isIdInRange($iSort,$rangeMap = []) { + foreach ($rangeMap as $title => $range) { + // 解析区间值(处理字符串和数字两种格式) + $rangeStr = is_string($range['range']) ? $range['range'] : (string)$range['range']; + $rangeParts = explode(',', $rangeStr); + // 单个值:表示 >= 该值 + if (count($rangeParts) == 1) { + $min = (int)$rangeParts[0]; + if ($iSort > $min) { + return $title; // 返回所属标题 + } + } + // 两个值:表示 [min, max] 闭区间(包含两端) + elseif (count($rangeParts) == 2) { + $min = (int)$rangeParts[0]; + $max = (int)$rangeParts[1]; + if ($iSort >= $min && $iSort <= $max) { + return $title; // 返回所属标题 + } + } + } + return ''; // 不在任何区间 + } + + /** + * 处理文章内容-研究结果 + */ + private function dealResearchResult($aParam = []){ + if(empty($aParam)){ + return []; + } + // 处理数据 + $result = []; + $currentH2 = null; // 当前h2信息 + $currentContent = $currentImageContent = $h2Positions = []; // 中间内容 // 存储h2的位置信息 [am_id, 索引] + foreach ($aParam as $item) { + if ($item['is_h2'] == 1) { + // 记录当前h2位置 + $h2Positions[] = $item['am_id']; + // 保存上一个h2数据 + if ($currentH2) { + // 下一个h2的am_id就是当前刚记录的(因为数组是顺序添加的) + $nextAmId = count($h2Positions) > 1 ? end($h2Positions) : null; + $result[$currentH2['content']] = [ + 'current_am_id' => $currentH2['am_id'], + 'next_am_id' => $nextAmId, + 'content_between' => $currentContent, + 'image_content_between' => $currentImageContent + ]; + } + // 更新当前h2 + $currentH2 = $item; + $currentContent = []; + $currentImageContent = []; + }elseif ($currentH2) {// 收集中间内容 + if($item['type'] == 0){ + $currentContent[] = $item['content']; + } + if($item['type'] == 1){ + $currentImageContent[] = $item['ami_id']; + } + } + } + // 处理最后一个h2 + if ($currentH2) { + $result[$currentH2['content']] = [ + 'current_am_id' => $currentH2['am_id'], + 'next_am_id' => 0, + 'content_between' => $currentContent, + 'image_content_between' => $currentImageContent + ]; + } + return $result; + } + + /** + * 组装问题【默认】 + */ + private function dealDefaultQuestion($aParam = [],$aMainImageList = []){ + + if(empty($aParam) || empty($aMainImageList)){ + return []; + } + + //获取问题 + $aQuestion = $this->aWechatQuestion; + $aQuestionLists = empty($aQuestion['default']) ? [] : $aQuestion['default']; if(empty($aQuestionLists)){ return []; } @@ -152,65 +325,129 @@ class OpenAi if(empty($sSysMessagePrompt)){ return []; } - //公共问题 - $aPublicQuestion = empty($aQuestion['public_message']) ? [] : $aQuestion['public_message']; - $aQuestion = array_merge($aPublicQuestion,$aQuestionLists); - //问题处理 + + //定义空问题 $aMessage = []; - foreach($aQuestion as $key => $value){ - //修改当前内容 - $sInfo = json_encode([$key => $value],JSON_UNESCAPED_UNICODE); + + //特殊标题强制转译 + $aField = ['introduction' => 'research_background','background' => 'research_background','methods' => 'research_method','materials_and_methods' => 'research_method']; + + //组装问题 + foreach ($aParam as $key => $value) { + //字段处理 + $key = empty($aField[$key]) ? $key : $aField[$key]; + $sQuestionInfo = empty($aQuestionLists[$key]) ? '' : $aQuestionLists[$key]; + if(empty($sQuestionInfo) || empty($value)){ + continue; + } + + //系统角色 + $sInfo = json_encode([$key => $sQuestionInfo],JSON_UNESCAPED_UNICODE); $sSysMessagePromptInfo = $sSysMessagePrompt.$sInfo; - if($key == "title_chinese"){ - $sUserPrompt = '{#title_chinese#}'; - $sUserPrompt = str_replace(array_keys($aSearch), array_values($aSearch), $sUserPrompt); + + //用户角色 + $sUserPrompt = is_array($value) ? implode('', array_column($value, 'content')) : $value ; + if($key == 'results'){ + $aResearchResult = $this->dealResearchResult($value); + if(!empty($aResearchResult)){ + foreach ($aResearchResult as $k => $val) { + $sContentBetween = empty($val['content_between']) ? '' : implode(',', $val['content_between']); + if(empty($sContentBetween)){ + continue; + } + + //用户角色 + $sUserPrompt = ['title' => strip_tags($k),'content' => $sContentBetween]; + $aUserPrompt = ['results' =>['title' => '[将标题直接翻译中文,无需针对内容总结翻译]','content' => '['.$sQuestionInfo.']']]; + + //系统角色 + $sSysMessagePromptInfo = $sSysMessagePrompt.json_encode($aUserPrompt,JSON_UNESCAPED_UNICODE); + + //组装message + $aMessage[] = [ + ['role' => 'system', 'content' => $sSysMessagePromptInfo], + ['role' => 'user', 'content' => empty($sUserPrompt) ? '' : json_encode($sUserPrompt,JSON_UNESCAPED_UNICODE)], + 'current_am_id' => empty($val['current_am_id']) ? 0 : $val['current_am_id'], + 'next_am_id' => empty($val['next_am_id']) ? 0 : $val['next_am_id'], + 'ami_id' => empty($val['image_content_between']) ? '' : implode(',', $val['image_content_between']) + ]; + } + continue; + } } - if($key == "content"){ - $sUserPrompt = '{#content#}'; - $sUserPrompt = str_replace(array_keys($aSearch), array_values($aSearch), $sUserPrompt); - } - if(!in_array($key,["title_chinese","content"])){ - $sUserPrompt = '标题:{#title_chinese#} 摘要: {#abstract#} 内容: {#content#}'; - $sUserPrompt = str_replace(array_keys($aSearch), array_values($aSearch), $sUserPrompt); - } - + //组装 $aMessage[] = [ ['role' => 'system', 'content' => $sSysMessagePromptInfo], - ['role' => 'user', 'content' => $sUserPrompt] + ['role' => 'user', 'content' => empty($sUserPrompt) ? '' : $sUserPrompt] ]; } return $aMessage; } /** - * 构建AI翻译内容-处理提示词 + * 组装问题【reviwew】 */ - public function buildTranslatePrompt($aSearch = []){ - if(empty($aSearch)){ + private function dealReviewQuestion($aParam = [],$aMainImageList = []){ + + if(empty($aParam) || empty($aMainImageList)){ return []; } - $sSysMessagePrompt = '你是一位专业的医学翻译专家,请将用户提供的内容准确、流畅地翻译成中文。翻译需自然流畅、口语化、连贯性、学术性,保留原文的专业术语和逻辑结构'; - $sUserPrompt = '"将以下内容翻译为中文,仅返回翻译结果,不要解释:\n {#content#}"'; - $sUserPrompt = str_replace(array_keys($aSearch), array_values($aSearch), $sUserPrompt); - $aMessage = [ - ['role' => 'system', 'content' => $sSysMessagePrompt], - ['role' => 'user', 'content' => $sUserPrompt] - ]; - //模型版本 - $aMessage = [ - 'model' => empty($aSearch['model']) ? 'gpt-4.1' : $aSearch['model'], - 'messages' => $aMessage, - 'temperature' => 0.2,// 降低随机性(0-1,0为最确定) - ]; - $aResult = json_decode($this->curlOpenAIStream($aMessage),true); - $sJsonData = empty($aResult['data']) ? '' : $aResult['data']; - if(empty($sJsonData)){ - return ['status' => 2,'msg' => 'Translation failed']; + + //获取问题 + $aQuestion = $this->aWechatQuestion; + $aQuestionLists = empty($aQuestion['default']) ? [] : $aQuestion['default']; + if(empty($aQuestionLists)){ + return []; } - $aJsonData = json_decode($sJsonData, true); - if (json_last_error() !== JSON_ERROR_NONE) { - return ['status' => 3,'msg' => 'JSON parsing error:'.json_last_error_msg()]; + //系统角色 + $sSysMessagePrompt = empty($aQuestion['system_message']) ? '' : $aQuestion['system_message']; + if(empty($sSysMessagePrompt)){ + return []; } - return ['status' => 1,'msg' => 'Translation successful','data' => $aJsonData]; + + //定义空问题 + $aMessage = []; + + //特殊标题强制转译 + $aField = ['introduction' => 'research_background','background' => 'research_background','methods' => 'research_method','materials_and_methods' => 'research_method']; + + //组装问题 + foreach ($aParam as $key => $value) { + //判空处理 + if(empty($value)){ + continue; + } + + //一级标题处理 + $sKey = str_replace('_', ' ', $key); + + //字段处理 + $key = empty($aField[$key]) ? $key : $aField[$key]; + //系统问题信息 + $sQuestionInfo = empty($aQuestionLists[$key]) ? '[针对content内容进行简单总结,采用连贯的段落形式,注意内容不要和文章内容有严重重复,总字数超过450字]' : $aQuestionLists[$key]; + $sInfo = json_encode([$key => $sQuestionInfo],JSON_UNESCAPED_UNICODE); + $sSysMessagePromptInfo = $sSysMessagePrompt.$sInfo; + + //用户输入内容处理 + $sUserPrompt = is_array($value) ? implode('', array_column($value, 'content')) : $value ; + if(!in_array($key, ['background','research_background','introduction','conclusion','digest'])){ + + //系统角色 + $aSysPrompt = ['results' =>['title' => '[将标题直接翻译中文,无需针对内容总结翻译]','content' => '['.$sQuestionInfo.']']]; + $sSysMessagePromptInfo = $sSysMessagePrompt.json_encode($aSysPrompt,JSON_UNESCAPED_UNICODE); + + //用户角色 + $aUserPrompt = ['title' => $sKey,'content' => is_array($value) ? implode('', array_column($value, 'content')) : $value]; + $sUserPrompt = json_encode($aUserPrompt,JSON_UNESCAPED_UNICODE); + } + //组装问题 + $aMessage[] = [ + ['role' => 'system', 'content' => $sSysMessagePromptInfo], + ['role' => 'user', 'content' => empty($sUserPrompt) ? '' : $sUserPrompt], + 'current_am_id' => empty($value[0]['am_id']) ? 0 : $value[0]['am_id'], + 'ami_id' => empty($aMainImageList[$key]) ? '' : implode(',', $aMainImageList[$key]), + ]; + } + return $aMessage; } /** @@ -221,7 +458,6 @@ class OpenAi if(empty($aSearch)){ return []; } - //获取问题 $aQuestion = $this->aReviewQuestion; $aQuestionLists = empty($aQuestion['default']) ? [] : $aQuestion['default']; @@ -276,18 +512,21 @@ class OpenAi //模型版本 $model = empty($aParam['model']) ? 'gpt-4.1' : $aParam['model']; + //接口地址 $sUrl = $this->sUrl; + // 降低随机性(0-1,0为最确定) + $iTemperature = empty($aParam['temperature']) ? '0.1' : $aParam['temperature']; + //组装数据 $data = [ 'model' => $model, 'messages' => $aMessage, - 'temperature' => 0.2,// 降低随机性(0-1,0为最确定) + 'temperature' => $iTemperature, ]; $this->curl = curl_init(); - // 通用配置 curl_setopt($this->curl, CURLOPT_URL, $sUrl); // 设置头信息 @@ -340,103 +579,398 @@ class OpenAi * @param $model 模型类型 */ public function curlOpenAIStream($aParam = []){ - //询问AI信息 + // 询问AI信息 $aMessage = empty($aParam['messages']) ? [] : $aParam['messages']; if(empty($aMessage)){ return json_encode(['status' => 2,'msg' => 'AI Q&A content not obtained']); } - //模型 - $model = empty($aParam['model']) ? 'gpt-4' : $aParam['model']; - //超时设置 - $timeout = empty($aParam['timeout']) ? 300 : $aParam['timeout']; - //接口地址 + // 模型与参数配置 + $model = empty($aParam['model']) ? 'deepseek-chat' : $aParam['model']; // 明确指定DeepSeek模型 + $timeout = empty($aParam['timeout']) ? 600 : $aParam['timeout']; // 进一步延长超时到15分钟 $sUrl = empty($aParam['url']) ? $this->sUrl : $aParam['url']; - - //组装数据 + $iTemperature = empty($aParam['temperature']) ? '0.2' : $aParam['temperature']; + // 组装数据 - 增加流式传输必要参数 $data = [ 'model' => $model, 'messages' => $aMessage, - // 'temperature' => 0.2,// 降低随机性(0-1,0为最确定) - 'temperature' => 0.6, // 中等随机性 - // 'max_tokens' => 1000, + 'temperature' => $iTemperature, 'top_p' => 0.8, 'frequency_penalty' => 0.3, 'presence_penalty' => 0.2, - 'stream' => true // 关键:启用流式传输,避免超时 + 'stream' => true, + 'stream_options' => ['include_usage' => false] // 减少额外数据传输 ]; - // Curl通用配置 + // Curl配置 - 增强流式传输兼容性 $this->curl = curl_init(); - curl_setopt($this->curl, CURLOPT_URL, $this->sUrl); + curl_setopt($this->curl, CURLOPT_URL, $sUrl); curl_setopt($this->curl, CURLOPT_HTTPHEADER, [ 'Content-Type: application/json', - 'Authorization: Bearer ' . $this->sApiKey + 'Authorization: Bearer ' . $this->sApiKey, + 'Accept: text/event-stream', + 'Cache-Control: no-cache', + 'Connection: keep-alive' // 保持长连接 ]); - // 代理与SSL配置(根据你的服务器环境调整) + + // 代理与SSL配置优化 curl_setopt($this->curl, CURLOPT_PROXY, $this->proxy); curl_setopt($this->curl, CURLOPT_SSL_VERIFYPEER, true); curl_setopt($this->curl, CURLOPT_SSL_VERIFYHOST, 2); - // 超时与传输配置 + curl_setopt($this->curl, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1_2); // 强制TLS版本 + + // 核心传输配置 - 解决数据截断 curl_setopt($this->curl, CURLOPT_POST, true); curl_setopt($this->curl, CURLOPT_POSTFIELDS, json_encode($data)); - curl_setopt($this->curl, CURLOPT_TIMEOUT, $timeout); // CURL超时 - curl_setopt($this->curl, CURLOPT_CONNECTTIMEOUT, 30); // 连接超时 - - // === 5. 流式响应处理(核心避免超时) === - $streamContent = ''; // 累积流式返回的内容 - // 回调函数:每收到一块数据就处理并保存,避免整段等待 - curl_setopt($this->curl, CURLOPT_WRITEFUNCTION, function ($curl, $data) use (&$streamContent) { - $streamContent .= $data; - return strlen($data); // 必须返回数据长度,否则CURL会中断 + curl_setopt($this->curl, CURLOPT_TIMEOUT, $timeout); + curl_setopt($this->curl, CURLOPT_CONNECTTIMEOUT, 30); + curl_setopt($this->curl, CURLOPT_NOSIGNAL, true); // 禁用信号处理,解决超时精度问题 + curl_setopt($this->curl, CURLOPT_BUFFERSIZE, 32); // 更小的缓冲区,更及时的处理 + curl_setopt($this->curl, CURLOPT_FRESH_CONNECT, true); + curl_setopt($this->curl, CURLOPT_FORBID_REUSE, true); // 禁止重用连接 + + // 流式响应处理 - 增加分块校验 + $streamContent = ''; + $isComplete = false; + $incompleteLine = ''; + $lastReceivedTime = time(); + $receivedChunks = 0; // 跟踪接收的分块数量 + + // 改进的回调函数 - 实时处理并校验每一块数据 + curl_setopt($this->curl, CURLOPT_WRITEFUNCTION, function ($curl, $data) use ( + &$streamContent, &$isComplete, &$incompleteLine, &$lastReceivedTime, &$receivedChunks + ) { + $lastReceivedTime = time(); + $receivedChunks++; + + // 处理跨块的不完整行(核心修复点) + $fullData = $incompleteLine . $data; + $lineEndPos = strrpos($fullData, "\n"); + + if ($lineEndPos !== false) { + // 提取完整行 + $completePart = substr($fullData, 0, $lineEndPos + 1); + $incompleteLine = substr($fullData, $lineEndPos + 1); + + $streamContent .= $completePart; + + // 检查是否包含结束标记 + if (strpos($completePart, 'data: [DONE]') !== false) { + $isComplete = true; + } + } else { + // 没有完整行,全部暂存 + $incompleteLine = $fullData; + } + + return strlen($data); // 必须返回完整长度,否则curl会中断 }); - //执行请求 + // 执行请求 $result = curl_exec($this->curl); - //获取错误信息 $curlErrno = curl_errno($this->curl); - //获取Code码 $httpCode = curl_getinfo($this->curl, CURLINFO_HTTP_CODE); - //关闭连接 + $totalReceived = curl_getinfo($this->curl, CURLINFO_SIZE_DOWNLOAD); curl_close($this->curl); - //错误处理 + // 处理最后剩余的不完整数据(关键修复) + if (!empty($incompleteLine)) { + $streamContent .= $incompleteLine; + if (strpos($incompleteLine, 'data: [DONE]') !== false) { + $isComplete = true; + } + } + + // 错误处理 - 区分不同类型的不完整情况 if (!empty($curlErrno)) { - // 超时但已有部分数据:保存进度,下次从该块重试 - if ($curlErrno == CURLE_OPERATION_TIMEDOUT && !empty($streamContent)) { + // 超时但已完成 + if ($curlErrno == CURLE_OPERATION_TIMEDOUT && $isComplete) { + $sStreamResponse = $this->parseMedicalStreamResponse($streamContent); return json_encode([ - 'status' => 3, - 'msg' => "处理超时,已保存进度", + 'status' => 1, + 'msg' => 'success', + 'data' => $sStreamResponse, + 'chunks' => $receivedChunks ]); } - // 其他错误(如网络问题) + + // 超时但有部分数据 + if ($curlErrno == CURLE_OPERATION_TIMEDOUT) { + return json_encode([ + 'status' => 3, + 'msg' => "超时,已接收{$receivedChunks}个分块", + 'partial_data' => $this->parseMedicalStreamResponse($streamContent), + 'is_complete' => $isComplete ? 1 : 0, + 'chunks' => $receivedChunks, + 'received_bytes' => $totalReceived + ]); + } + + // 其他错误 return json_encode([ 'status' => 4, - 'msg' => "OPENAI Error:".curl_error($this->curl) + 'msg' => "CURL错误: " . curl_error($this->curl), + 'error_code' => $curlErrno, + 'http_code' => $httpCode ]); } - //处理流式结果 + // HTTP状态码检查 + if ($httpCode < 200 || $httpCode >= 300) { + return json_encode([ + 'status' => 5, + 'msg' => "HTTP错误: {$httpCode}", + 'response' => $streamContent, + 'chunks' => $receivedChunks + ]); + } + + // 处理正常结果 $sStreamResponse = $this->parseMedicalStreamResponse($streamContent); - return json_encode(['status' => 1,'msg' => 'success','data' => $sStreamResponse]); + return json_encode([ + 'status' => 1, + 'msg' => 'success', + 'data' => $sStreamResponse, + 'is_complete' => $isComplete ? 1 : 0, + 'chunks' => $receivedChunks, + 'received_bytes' => $totalReceived + ]); } + /** - * 解析流式响应 + * 增强版流式响应解析 - 解决JSON片段拼接问题 */ private function parseMedicalStreamResponse($streamContent){ $fullContent = ''; $lines = explode("\n", $streamContent); + $validLines = 0; + $errorLines = 0; + foreach ($lines as $line) { $line = trim($line); - if (strpos($line, 'data: ') === 0 && $line !== 'data: [DONE]') { - $jsonStr = substr($line, 6); // 去掉"data: "前缀 + if (empty($line)) continue; + + // 处理DeepSeek的SSE格式 + if (strpos($line, 'data: ') === 0) { + // 检查结束标记 + if ($line === 'data: [DONE]') { + break; + } + + $jsonStr = substr($line, 6); $jsonData = json_decode($jsonStr, true); - $fullContent .= $jsonData['choices'][0]['delta']['content'] ?? ''; + + // 解析错误处理与修复 + if (json_last_error() !== JSON_ERROR_NONE) { + $errorLines++; + // 针对DeepSeek常见的JSON格式问题进行修复 + $jsonStr = $this->fixDeepSeekJson($jsonStr); + $jsonData = json_decode($jsonStr, true); + } + + // 提取内容(兼容DeepSeek的响应结构) + if (isset($jsonData['choices'][0]['delta']['content'])) { + $fullContent .= $jsonData['choices'][0]['delta']['content']; + $validLines++; + } elseif (isset($jsonData['choices'][0]['text'])) { + $fullContent .= $jsonData['choices'][0]['text']; + $validLines++; + } } } + + // 记录解析统计,便于调试 + error_log("流式解析: 有效行{$validLines}, 错误行{$errorLines}"); return $fullContent; } + /** + * 高性能DeepSeek JSON修复函数(终极版) + * 确保修复后的JSON字符串100%可解析,同时保持最优性能 + */ + private function fixDeepSeekJson($jsonStr) { + // 基础处理:去除首尾空白并处理空字符串(高效操作) + $jsonStr = trim($jsonStr); + if (empty($jsonStr)) { + return '{}'; + } + + // 1. 预处理:清除首尾干扰字符(减少正则使用) + $len = strlen($jsonStr); + $start = 0; + // 跳过开头的逗号和空白 + while ($start < $len && ($jsonStr[$start] === ',' || ctype_space($jsonStr[$start]))) { + $start++; + } + $end = $len - 1; + // 跳过结尾的逗号和空白 + while ($end >= $start && ($jsonStr[$end] === ',' || ctype_space($jsonStr[$end]))) { + $end--; + } + if ($start > 0 || $end < $len - 1) { + $jsonStr = substr($jsonStr, $start, $end - $start + 1); + $len = strlen($jsonStr); + // 处理截取后可能为空的情况 + if ($len === 0) { + return '{}'; + } + } + + // 2. 括号平衡修复(核心逻辑保持,减少计算) + $braceDiff = substr_count($jsonStr, '{') - substr_count($jsonStr, '}'); + if ($braceDiff !== 0) { + if ($braceDiff > 0) { + $jsonStr .= str_repeat('}', $braceDiff); + } else { + // 仅在必要时使用正则移除多余括号 + $jsonStr = preg_replace('/}(?=([^"]*"[^"]*")*[^"]*$)/', '', $jsonStr, -$braceDiff); + } + } + + $bracketDiff = substr_count($jsonStr, '[') - substr_count($jsonStr, ']'); + if ($bracketDiff !== 0) { + if ($bracketDiff > 0) { + $jsonStr .= str_repeat(']', $bracketDiff); + } else { + $jsonStr = preg_replace('/](?=([^"]*"[^"]*")*[^"]*$)/', '', $jsonStr, -$bracketDiff); + } + } + + // 3. 控制字符清理(合并为单次处理) + $jsonStr = preg_replace( + '/([\x00-\x1F\x7F]|[^\x20-\x7E\xA0-\xFF]|\\\\u001f|\\\\u0000)/', + '', + $jsonStr + ); + + // 4. 引号处理(仅在有引号时处理,减少操作) + if (strpos($jsonStr, '"') !== false) { + // 修复未转义引号(优化正则) + $jsonStr = preg_replace('/(?handleJsonError($jsonStr, $errorCode); + $attempts++; + } + + // 终极容错:如果所有尝试都失败,返回空JSON对象 + return '{}'; + } + + /** + * 根据JSON解析错误类型进行针对性修复 + */ + private function handleJsonError($jsonStr, $errorCode) { + switch ($errorCode) { + case JSON_ERROR_SYNTAX: + // 语法错误:尝试更激进的清理 + $jsonStr = preg_replace('/[^\w{}[\]":,.\s\\\]/', '', $jsonStr); + $jsonStr = preg_replace('/,\s*([}\]])/', ' $1', $jsonStr); + break; + + case JSON_ERROR_CTRL_CHAR: + // 控制字符错误:进一步清理控制字符 + $jsonStr = preg_replace('/[\x00-\x1F\x7F]/u', '', $jsonStr); + break; + + case JSON_ERROR_UTF8: + // UTF8编码错误:尝试重新编码 + $jsonStr = utf8_encode(utf8_decode($jsonStr)); + break; + + default: + // 其他错误:使用备用修复策略 + $jsonStr = $this->fallbackJsonFix($jsonStr); + } + + return $jsonStr; + } + + /** + * 备用JSON修复策略(更激进的修复方式) + * 当主修复逻辑失败时使用 + */ + private function fallbackJsonFix($jsonStr) { + // 更彻底的清理 + $jsonStr = preg_replace('/[^\w{}[\]":,.\s\\\]/u', '', $jsonStr); + + + if (!preg_match('/^[\[{]/', $jsonStr)) { + $jsonStr = '{' . $jsonStr . '}'; + } + + // 最后尝试平衡括号 + $openBrace = substr_count($jsonStr, '{'); + $closeBrace = substr_count($jsonStr, '}'); + $jsonStr .= str_repeat('}', max(0, $openBrace - $closeBrace)); + + $openBracket = substr_count($jsonStr, '['); + $closeBracket = substr_count($jsonStr, ']'); + $jsonStr .= str_repeat(']', max(0, $openBracket - $closeBracket)); + + // 确保结尾正确 + $lastChar = substr($jsonStr, -1); + if ($lastChar !== '}' && $lastChar !== ']') { + $jsonStr .= preg_match('/^\{/', $jsonStr) ? '}' : ']'; + } + + return $jsonStr; + } + /** * 微信公众号-生成公微内容(CURL) */ @@ -459,12 +993,26 @@ class OpenAi if($result == 1){ //定义空数组 foreach ($aMessage as $key => $value) { + $aParam['current_am_id'] = empty($value['current_am_id']) ? 0 : $value['current_am_id']; + $aParam['next_am_id'] = empty($value['next_am_id']) ? 0 : $value['next_am_id']; + $aParam['ami_id'] = empty($value['ami_id']) ? 0 : $value['ami_id']; + if(isset($aParam['current_am_id'])){ + unset($value['current_am_id']); + } + if(isset($aParam['next_am_id'])){ + unset($value['next_am_id']); + } + if(isset($aParam['ami_id'])){ + unset($value['ami_id']); + } $aParam['messages'] = $value; $aParam['chunkIndex'] = $key; $aParam['count_num'] = $iNum; // if($key%2 == 0){ $aParam['key_name'] = 'queue_1_completed'; Queue::push('app\api\job\createFieldForQueue@fire', $aParam, 'createFieldForQueue'); + // } + // }else{ // $aParam['url'] = $this->sAiUrl; // $aParam['key_name'] = 'queue_2_completed'; @@ -497,9 +1045,8 @@ class OpenAi if (empty($aMessage)) { return json_encode(['status' => 2, 'msg' => 'AI Q&A content not obtained']); } - //最大执行数 - $iMaxNum = empty($aParam['count_num']) ? 0 : $aParam['count_num']; //请求OPENAI + $aParam['temperature'] = '0.6'; $aResult = $this->curlOpenAIStream($aParam); //更新处理进度 $iIndex = empty($aParam['chunkIndex']) ? 0 : $aParam['chunkIndex']; @@ -514,7 +1061,13 @@ class OpenAi $aReturnData = json_decode($aResult,true); $aDataInfo =empty($aReturnData['data']) ? [] : $aReturnData['data']; $aData = empty($aDataInfo) ? [] : $this->extractAndParse($aDataInfo); + if(!empty($aData) && is_string($aData)){ + $aData = json_decode($aData,true); + } $aData = empty($aData['data']) ? [] : $aData['data']; + $aData['current_am_id'] = empty($aParam['current_am_id']) ? 0 : $aParam['current_am_id']; + $aData['next_am_id'] = empty($aParam['next_am_id']) ? 0 : $aParam['next_am_id']; + $aData['ami_id'] = empty($aParam['ami_id']) ? 0 : $aParam['ami_id']; if(!empty($aData)){//更新AI审稿记录表 if($iProgress >= 100){ $aData['is_generate'] = 1; @@ -524,24 +1077,34 @@ class OpenAi } return $aResult; } + /** * 微信公众号-更新AI生成内容 */ private function updateAiContent($aParam = []){ - //文章ID $iArticleId = empty($aParam['article_id']) ? 0 : $aParam['article_id']; if(empty($iArticleId)){ return json_encode(['status' => 2,'msg' => 'Please select the article to be modified']); } - //更新生成状态 + + //更新内容入库 $oArticle = new Article; - $aResult = json_decode($oArticle->updateAiArticle($aParam),true); + //判断是否生成 + $is_generate = empty($aParam['is_generate']) ? 2 : $aParam['is_generate']; + if($is_generate == 1 || empty($aParam['results'])){ + $aResult = json_decode($oArticle->updateAiArticle($aParam),true); + } + if(!empty($aParam['results'])){ + $aData = $aParam['results']; + $aData['current_am_id'] = empty($aParam['current_am_id']) ? 0 : $aParam['current_am_id']; + $aData['next_am_id'] = empty($aParam['next_am_id']) ? 0 : $aParam['next_am_id']; + $aData['ami_id'] = empty($aParam['ami_id']) ? 0 : $aParam['ami_id']; + $aData['article_id'] = $iArticleId; + $aResult = json_decode($oArticle->updateAiArticleResults($aData),true); + } $iStatus = empty($aResult['status']) ? 0 : $aResult['status']; $sMsg = empty($aResult['msg']) ? '更新状态失败' : $aResult['msg']; - //是否生成 - $is_generate = empty($aParam['is_generate']) ? 2 : $aParam['is_generate']; - //内容生成完成推送上传素材队列 if($is_generate == 1){ if($iStatus == 1){ @@ -587,25 +1150,33 @@ class OpenAi * @return array|object 解析后的JSON数据,失败时返回null */ public function extractAndParse($text, $assoc = true){ - - // 使用正则表达式提取JSON代码块 + // 尝试提取标准JSON代码块 preg_match('/```json\s*(\{.*?\})\s*```/s', $text, $matches); - $jsonContent = empty($matches[1]) ? '' : $matches[1]; + $jsonContent = empty($matches[1]) ? $text : $matches[1]; + + // 若未提取到,尝试宽松匹配(允许没有json标记) if (empty($jsonContent)) { - // 尝试宽松匹配(允许没有json标记) preg_match('/```\s*(\{.*?\})\s*```/s', $text, $matches); $jsonContent = empty($matches[1]) ? $text : $matches[1]; } - + + // 清理JSON内容,去除多余标记和控制字符 + $jsonContent = trim(trim($jsonContent, '```json'), '```'); + $jsonContent = preg_replace('/[\x00-\x1F\x7F]/', '', $jsonContent); // 过滤所有控制字符 + // 解析JSON $aData = json_decode($jsonContent, $assoc); - // 检查解析是否成功 + // 检查解析结果 if (json_last_error() !== JSON_ERROR_NONE) { - return ['status' => 2,'msg' => "API返回无效JSON: " . json_last_error_msg()]; + return [ + 'status' => 2, + 'msg' => "API返回无效JSON: " . json_last_error_msg() . '===============' . $jsonContent, + 'data' => null + ]; } - - return ['status' => 1,'msg' => 'success','data' => $aData]; + + return ['status' => 1, 'msg' => 'success', 'data' => $aData]; } }