...是1.1.0起新加的功能,所以1.0.x的版本无此bug) #define SCWS_WORD_MALLOCED 0x100 只要将 0x100 改变 0x80 即可,因为 struct scws_word 中定义的 flag 为 unsigned char 即 8bit, 容不下 0x100 导致,所有的 ->flag |= SCWS_WORD_MALLOCED 失效,以至于在 scws.c 中...
...e (res = cur = scws_get_result(s)) { while (cur != NULL) { printf("Word: %.*s/%s (IDF = %4.2f)\n", cur->len, text+cur->off, cur->attr, cur->idf); cur = cur->next; } scws_free_result(res); } scws_free(s); } [/code] 执行结果 [root@namenode-1 scws]# ./a.out Word: 上海市/ns...
...索引库,转换出来的内容呢[hr] $xs->search->setDb('products'); $words3 = $xs->search->getExpandedQuery($searchText); print_r($words3); //这样也不行
... 'no'; } $this -> so->set_rule('rules.utf8.ini'); } function word_list($text) { $time_start = $this -> get_microtime(); $this -> so->send_text($text); while ($temp = $this -> so->get_result()) { foreach ($temp as $key=>$val) { $list[$val['word']] = $val['word'];...
...rules.utf8.ini'); $pscws->set_ignore(true); $pscws->send_text($title); $words = $pscws->get_tops(10); $tags = array(); foreach ($words as $val) { $tags[] = $val['word']; } $pscws->close(); return $tags; } ------------------------------------ $title=并遵守中华人民共和国有关法...
...$cws->set_rule('pscws4/etc/rules.utf8.ini'); $cws->send_text($title); $words = $cws->get_tops(10); $cws->close(); $tags = array(); foreach ($words as $val) { $tags[] = $val['word']; } return implode(',', $tags); } print_r(get_tags_arr($text)); 象这个代码,最终...
...的生态效应及生态系统健康评价研究项目的汇交数据'; $words = $tokenizer->getResult($text); print_r($words);
...。 譬如: $so = scws_new(); $so->set_charset('utf8'); $so->set_dict('words.txt',SCWS_XDICT_TXT); $so->set_ignore(true); $so->send_text("GMhello指导员"); echo ''; while ($tmp = $so->get_result()) { var_dump($tmp); } $so->close(); 在words.txt只定义如下内容 GM 1.0 1.0 * hello指导员 ...
function get_tfidf($word, $count) { if ($count < 1000) $count = 21000 - $count * 18; $tf = log($count); $tf = pow($tf, 5) * log(strlen($word)); $tf = log($tf); $idf = log(5000000000/$count); //if ($tf > 13) $idf *= 1.4; return array($...