jieba-php
jieba-php copied to clipboard
array_merge() 的第二个参数 $combinedWeights 不是一个数组。
****//源代码
'default']); JiebaAnalyse::init(); // 获取输入并清洗(兼容PHP 7.2数组语法) $ocrText = isset($_POST['ocr_text']) ? $_POST['ocr_text'] : "你好世界,欢迎使用结巴分词!"; $cleanedText = preg_replace('/[^' . $allowedPattern . ']/u', '', $ocrText); $cleanedText = trim($cleanedText); // 基础分词 $segments = Jieba::cut($cleanedText); try { $topK = 30; // 强制转换为数组(防御库返回非数组情况) $keywordsWithWeights = (array) JiebaAnalyse::extractTags($cleanedText, $topK, true); $combinedKeywords = []; $punctuationSet = str_split($validPunctuations); // 转换为数组便于查找 // 组合词生成(严格排除含标点的词汇) for ($i = 0; $i = 4) { $combinedKeywords[] = $combined; } } // 空数组安全处理(核心修复逻辑) $combinedWeights = []; if (is_array($combinedKeywords) && !empty($combinedKeywords)) { $combinedWeights = array_combine($combinedKeywords, array_fill(0, count($combinedKeywords), 1)); } // 确保两个参数都是数组(PHP 7.2安全合并) $keywordsWithWeights = array_merge($keywordsWithWeights, $combinedWeights); // 关键词过滤(排除纯标点和单字) $keywords = array_filter( array_keys($keywordsWithWeights), function ($kw) use ($punctuationSet) { return mb_strlen($kw, 'UTF-8') >= 2 && !in_array($kw, $punctuationSet); // 排除所有标点符号 } ); } catch (Throwable $e) { http_response_code(500); die(json_encode([ 'status' => 'error', 'message' => '关键词提取失败: ' . $e->getMessage() ])); } $response = [ 'status' => 'success', 'segments' => $segments, 'keywords' => $keywords, 'message' => '分词及关键词提取完成' ]; echo json_encode($response, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT); 调试输出:Warning: array_merge(): Argument #2 is not an array in /www/wwwroot/1.14.92.218/question/ocr/vendor/fukuball/jieba-php/src/class/JiebaAnalyse.php on line 118
{ "status": "success", "segments": [ "你好", "世界", ",", "欢迎", "使用", "结巴", "分词", "!" ], "keywords": [ "分词", "结巴", "你好", "欢迎", "使用", "世界", "你好世界", "欢迎使用", "使用结巴", "结巴分词" ], "message": "分词及关键词提取完成" } 解决方案: $options = array_merge($defaults, $options); 改为 $options = array_merge($defaults, (array)$options); 原因: 项目太久了,作者当时估计还在用着PHP6 在PHP7以上上面对于数据类型的精准性要求较高,不过功能上没什么影响只是看着难受(未验证,纯推测)