Overview

9.11

Score Compare

After a token go through the FSM(Finite-State Machine), process will find out its’ pattern type. There is a one-to-many relationship between morphological types and semantic types.
We define features every semantic type should qualify in token_config.py.If the token match a feature which belongs to a semantic,it will get a score added in this semantic.

After semantic score assigned,the process will normalize the score to [0,1], I call the normalized result weight, which means the possibility of the token to have this semantic.

In some situation, this thinking will make some mistake. Because different pattern type will map into the same semantic type.Sometimes, their weight might be same, but the score is different. Usually, we need the high score one to replace the low score one which might firstly be put into the clot chess.

occupy_token_score = occupy_token.get_score_by_semantic(semantic)
token_weight = token.get_weight_by_semantic(semantic)
token_score = token.get_score_by_semantic(semantic)
if occupy_token_weight >= token_weight and occupy_token_score >= token_score:
    logger.info(f"占领token【{occupy_token.value}】语义权重【{occupy_token_weight}】>= 当前token【{token.value}】语义权重【{token_weight}】;尝试不摆放{token}")
    logger.info(f"占领token【{occupy_token.value}】语义得分【{occupy_token_score}】>= 当前token【{token.value}】语义得分【{token_score}】;尝试不摆放{token}")
    next_state = copy.deepcopy(cur_state)
    next_state.discard_token(token)
    logger.info(f"新的元素棋盘放入队列:{next_state}")
    state_queue.put([-next_state.get_score(), next_state])
else:
    logger.info(f"摆放【{token.value}】替换【{occupy_token.value}】")
    next_state = copy.deepcopy(cur_state)
    next_state.pos_token(index, token)
    logger.info(f"新的元素棋盘放入队列:{next_state}")

Date Fix

Float Bond Money Situation

9.12 9.13 - 9.15

'''
@File    :   merge_strategy.py
@Time    :   2021/09/14 07:26:21
@Author  :   LuckyQ
@Version :   1.0
@Desc    :   Token 合并策略
'''

from token_evaluate import has_semantic

from cj_token import Token

class MergeStrategy(object):
    def merge(self, token_list):
        pass

# 交易关键词合并策略，形如“关键词1:xx,关键词2:xx”的(2|4|6|8)个Token进行合并
class DealKeywordMerge(MergeStrategy):
    keyword_list = ["买", "卖"]
    obj_semantic = ["Deal_Company", "Real_Deal_Op_Name"]
    def merge(self, token_list):
        res_list = []
        i, n = 0, len(token_list)
        while i < n:
            token = token_list[i]
            token_value = token.value
            if not self._is_contain_keyword(token_value):
                i += 1
                res_list.append(token)
                continue
            flag = False
            for semantic in self.obj_semantic:
                if has_semantic(token, semantic):
                    flag = True
                    break
            if not flag:
                i += 1
                res_list.append(token)
                continue
            # 从 i 偶数间隔往下寻找可以合并的关键词组
            start = i
            assert i+1 < n
            merge_value_list = [token_list[i].value, token_list[i+1].value]
            i += 2
            while i+1 < n:
                token = token_list[i]
                token_value = token.value
                if not self._is_contain_keyword(token_value):
                    break
                flag = False
                for semantic in self.obj_semantic:
                    if has_semantic(token, semantic):
                        flag = True
                        break
                if not flag:
                    break
                
                merge_value_list.extend([token_list[i].value, token_list[i+1].value])
                i += 2

            new_token = Token(value=merge_value_list, pattern_type="Keyword_Compose")
            new_token.row = token_list[start].row
            res_list.append(new_token)
            # 找到一个这样的单词组就退出循环
            break
        while i < n:
            res_list.append(token_list[i])
            i += 1
        return res_list

    # 某字符串是否包含 keyword_list 中的关键词
    def _is_contain_keyword(self, s):
        for keyword in self.keyword_list:
                if keyword in s:
                    return True
        return False

# 多个同态要素合并策略
class SemanticSimilaryMerge(MergeStrategy):
    semantic_list = ["Product", "Bond_Money"]
    def merge(self, token_list):
        res_list = []
        i, n = 0, len(token_list)
        start, end = -1, -1
        while i < n-1:
            token, next_token = token_list[i], token_list[i+1]
            if not self._is_semantic_seq(token, next_token):
                res_list.append(token)
                i += 1
                continue
            start = i
            value_list = [token.value, next_token.value]
            i += 2
            while i < n - 1:
                token, next_token = token_list[i], token_list[i+1]
                if not self._is_semantic_seq(token, next_token):
                    break
                value_list.extend([token.value, next_token.value])
                i += 2

            new_token = Token(value=value_list, pattern_type="Element_Compose")
            new_token.row = token_list[start].row
            res_list.append(new_token)
            break
        while i < n:
            res_list.append(token_list[i])
            i += 1
        return res_list
        
    def _is_semantic_seq(self, token, next_token):
        return has_semantic(token, self.semantic_list[0]) and has_semantic(next_token, self.semantic_list[1])

# 同义词、词补充合并策略
class SupplyMerge(MergeStrategy):
    sequence_list = [["Real_Deal_Op_Name", "Deal_Company"], ["Real_Deal_Op_Name", "Op_Dealer_No"], \
                    ["Deal_Company", "Dealer_No"]]
    def merge(self, token_list):
        res_list = []
        i, n = 0, len(token_list)
        while i < n:
            match_seq = False
            for sequence in self.sequence_list:
                if i + len(sequence) - 1 >= n:
                    continue
                flag = True
                for j in range(0, len(sequence)):
                    if not has_semantic(token_list[i+j], sequence[j]):
                        flag = False
                        break
                # 有一个满足条件的语义序列，则将它们合并
                if flag:
                    value_list = [token_list[i].value for i in range(i,i+len(sequence))]
                    new_token = Token(value=value_list, pattern_type="Similary_Compose")
                    new_token.row = token_list[i].row
                    res_list.append(new_token)
                    i += len(sequence)
                    match_seq = True
                    break
            if not match_seq:
                res_list.append(token_list[i])
                i += 1
        return res_list

9.16

# 如果当前棋盘中相邻两行差异太大的话，将下一行合并到这一行
        fix_cloth_chess = []
        fix_orders = []
        # 将交集小于 max_dis 的相邻两行进行合并
        max_dis = 2
        i = 0
        while i < len(self.cloth_chess):
            row = self.cloth_chess[i]
            if i == len(self.cloth_chess) - 1:
                fix_cloth_chess.append(row)
                fix_orders.append(self.orders[i])
                break
            cur_element_list = self.orders[i]
            next_element_list = self.orders[i+1]
            element_dis = len(set(cur_element_list).intersection(set(next_element_list)))
            if element_dis <= max_dis:
                fix_order = copy.deepcopy(cur_element_list)
                for ele in next_element_list:
                    if ele not in fix_order:
                        fix_order.append(ele)
                new_row = copy.deepcopy(row)
                for i, col in enumerate(self.cloth_chess[i+1]):
                    if col != None and row[i] == None:
                        new_row[i] = copy.deepcopy(col)
                fix_cloth_chess.append(new_row)
                fix_orders.append(fix_order)
                i += 2
            else:
                fix_cloth_chess.append(row)
                fix_orders.append(self.orders[i])
                i += 1
        self.cloth_chess = fix_cloth_chess
        self.orders = fix_orders

摸鱼的Llunch

2021-09-17-组会