버그 수정 및 해커 그룹 분리 코드 추가
This commit is contained in:
28
crawler.py
28
crawler.py
@ -444,6 +444,29 @@ def find_skill_uses_for_validation(
|
||||
return skill_use_set
|
||||
|
||||
|
||||
def collapse_duplicated(data):
|
||||
# 그룹핑 key: uid + timestamp + xyz + bef_xyz
|
||||
def make_key(item):
|
||||
return (
|
||||
item["uid"],
|
||||
item["@timestamp"],
|
||||
tuple(item["body"]["xyz"]),
|
||||
tuple(item["body"]["bef_xyz"])
|
||||
)
|
||||
|
||||
grouped = defaultdict(list)
|
||||
for d in data:
|
||||
grouped[make_key(d)].append(d)
|
||||
|
||||
collapsed = []
|
||||
for _, items in grouped.items():
|
||||
base = items[0].copy()
|
||||
base["duplicated"] = len(items)
|
||||
collapsed.append(base)
|
||||
|
||||
return collapsed
|
||||
|
||||
|
||||
# =========================
|
||||
# 2) 저장 전용 (분석 없음)
|
||||
# =========================
|
||||
@ -469,10 +492,11 @@ def save_verified_batches_to_csv(
|
||||
for verified_hack_logs in verified_batches_iter:
|
||||
summary_data = defaultdict(lambda: defaultdict(int))
|
||||
|
||||
for log in verified_hack_logs:
|
||||
collapsed = collapse_duplicated(verified_hack_logs)
|
||||
for log in collapsed:
|
||||
uid = log.get('uid')
|
||||
if uid:
|
||||
hack_type = len(log)
|
||||
hack_type = log.get('duplicated')
|
||||
summary_data[uid][hack_type] += 1
|
||||
|
||||
uids_to_lookup = {log['uid'] for log in verified_hack_logs if 'uid' in log}
|
||||
|
||||
Reference in New Issue
Block a user