xianyufaka/utils/order_history_sync.py
2026-04-15 22:56:44 +08:00

445 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import time
from typing import Any, Dict, List, Optional
import aiohttp
from loguru import logger
from utils.order_detail_fetcher import OrderDetailFetcher
from utils.time_utils import parse_db_timestamp, parse_local_datetime_text_to_db_utc
from utils.xianyu_utils import generate_sign, trans_cookies
ORDER_LIST_API_URL = 'https://h5api.m.goofish.com/h5/mtop.taobao.idle.trade.merchant.sold.get/1.0/'
ORDER_LIST_API_NAME = 'mtop.taobao.idle.trade.merchant.sold.get'
ORDER_LIST_REFERER = 'https://seller.goofish.com/?site=COMMONPRO#/seller-trade/order-manage'
ORDER_LIST_QUERY_CODE = 'ALL'
DEFAULT_PAGE_SIZE = 20
ORDER_HISTORY_ANCHOR_FIELDS = (
'platform_paid_at',
'platform_created_at',
'platform_completed_at',
)
ORDER_STATUS_ALIASES = {
'processing': 'processing',
'pending_payment': 'pending_payment',
'pending_ship': 'pending_ship',
'partial_success': 'partial_success',
'partial_pending_finalize': 'partial_pending_finalize',
'shipped': 'shipped',
'completed': 'completed',
'refunding': 'refunding',
'refund_cancelled': 'refund_cancelled',
'cancelled': 'cancelled',
'unknown': 'unknown',
'处理中': 'processing',
'待付款': 'pending_payment',
'待发货': 'pending_ship',
'部分发货': 'partial_success',
'部分待收尾': 'partial_pending_finalize',
'已发货': 'shipped',
'交易成功': 'completed',
'已完成': 'completed',
'退款中': 'refunding',
'退款撤销': 'refund_cancelled',
'交易关闭': 'cancelled',
'已关闭': 'cancelled',
}
def normalize_order_history_status(value: Any) -> Optional[str]:
text = str(value or '').strip()
if not text:
return None
normalized = ORDER_STATUS_ALIASES.get(text)
if normalized:
return normalized
return ORDER_STATUS_ALIASES.get(text.lower())
def normalize_history_amount(value: Any) -> Optional[str]:
text = str(value or '').strip()
if not text:
return None
cleaned = text.replace('¥', '').replace('', '').replace(',', '').strip()
try:
return f"{float(cleaned):.2f}"
except (TypeError, ValueError):
return None
def resolve_order_history_anchor_time(candidate: Dict[str, Any]) -> Optional[str]:
if not isinstance(candidate, dict):
return None
for field_name in ORDER_HISTORY_ANCHOR_FIELDS:
value = str(candidate.get(field_name) or '').strip()
if value:
return value
return None
def classify_order_history_range(
anchor_time: Optional[str],
utc_start: Optional[str] = None,
utc_end_exclusive: Optional[str] = None,
) -> str:
if not utc_start or not utc_end_exclusive:
return 'in_range'
anchor_dt = parse_db_timestamp(anchor_time) if anchor_time else None
start_dt = parse_db_timestamp(utc_start)
end_dt = parse_db_timestamp(utc_end_exclusive)
if not anchor_dt or not start_dt or not end_dt:
return 'unknown'
if anchor_dt < start_dt:
return 'before'
if anchor_dt >= end_dt:
return 'after'
return 'in_range'
def _cookie_dict_to_string(cookies_dict: Dict[str, str]) -> str:
return '; '.join(
f'{name}={value}'
for name, value in cookies_dict.items()
if str(name).strip() and value is not None
)
def _extract_set_cookie_updates(response_headers) -> Dict[str, str]:
try:
set_cookie_values = response_headers.getall('Set-Cookie', [])
except Exception:
raw_value = response_headers.get('Set-Cookie')
if isinstance(raw_value, list):
set_cookie_values = raw_value
elif raw_value:
set_cookie_values = [raw_value]
else:
set_cookie_values = []
updates: Dict[str, str] = {}
for cookie in set_cookie_values:
if '=' not in cookie:
continue
try:
name, value = cookie.split(';', 1)[0].split('=', 1)
except ValueError:
continue
updates[name.strip()] = value.strip()
return updates
class OrderHistoryPageFetcher:
def __init__(self, cookie_string: str, cookie_id_for_log: str = 'unknown', headless: bool = True):
self.cookie_id_for_log = cookie_id_for_log or 'unknown'
self.headless = headless
self.cookie_string = str(cookie_string or '').strip()
self.cookies: Dict[str, str] = trans_cookies(self.cookie_string) if self.cookie_string else {}
self.fetcher = OrderDetailFetcher(self.cookie_string, headless=headless, cookie_id_for_log=self.cookie_id_for_log)
self.session: Optional[aiohttp.ClientSession] = None
def _is_auth_failure_ret(self, ret_value: Any) -> bool:
if isinstance(ret_value, str):
ret_text = ret_value
elif isinstance(ret_value, (list, tuple)):
ret_text = ' | '.join([str(item) for item in ret_value])
else:
ret_text = str(ret_value or '')
auth_keywords = (
'令牌过期',
'session过期',
'FAIL_SYS_USER_VALIDATE',
'FAIL_SYS_TOKEN_EXPIRED',
'FAIL_SYS_TOKEN_EXOIRED',
'FAIL_SYS_SESSION_EXPIRED',
'passport.goofish.com',
'mini_login',
'login',
)
ret_text_lower = ret_text.lower()
return any(keyword.lower() in ret_text_lower for keyword in auth_keywords)
def _set_runtime_cookie_state(self, cookies_dict: Dict[str, str]) -> bool:
normalized = {str(name): str(value) for name, value in cookies_dict.items() if str(name).strip()}
new_cookie_string = _cookie_dict_to_string(normalized)
if new_cookie_string == self.cookie_string:
return False
self.cookies = normalized
self.cookie_string = new_cookie_string
self.fetcher.cookie = new_cookie_string
return True
async def _persist_cookie_update(self) -> None:
if not self.cookie_string or self.cookie_id_for_log == 'unknown':
return
try:
from db_manager import db_manager
db_manager.update_cookie_account_info(self.cookie_id_for_log, cookie_value=self.cookie_string)
except Exception as exc:
logger.warning(f"{self.cookie_id_for_log}】保存刷新后的 Cookie 失败: {exc}")
async def _apply_response_cookie_updates(self, response_headers) -> bool:
updates = _extract_set_cookie_updates(response_headers)
if not updates:
return False
merged_cookies = dict(self.cookies)
merged_cookies.update(updates)
changed = self._set_runtime_cookie_state(merged_cookies)
if changed:
await self._persist_cookie_update()
return changed
async def _ensure_session(self) -> None:
if self.session and not self.session.closed:
return
timeout = aiohttp.ClientTimeout(total=30)
self.session = aiohttp.ClientSession(timeout=timeout)
def _build_request_headers(self) -> Dict[str, str]:
return {
'accept': 'application/json',
'content-type': 'application/x-www-form-urlencoded',
'cookie': self.cookie_string,
'idle_site_biz_code': 'COMMONPRO',
'idle_user_group_member_id': '',
'origin': 'https://seller.goofish.com',
'referer': ORDER_LIST_REFERER,
'user-agent': (
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/138.0.0.0 Safari/537.36'
),
}
def _build_request_params(self, data_val: str) -> Dict[str, str]:
token = self.cookies.get('_m_h5_tk', '').split('_')[0]
if not token:
raise ValueError(f'{self.cookie_id_for_log}】Cookie 缺少 _m_h5_tk无法请求历史订单列表')
params = {
'jsv': '2.7.2',
'appKey': '34839810',
't': str(int(time.time() * 1000)),
'sign': '',
'v': '1.0',
'type': 'json',
'accountSite': 'xianyu',
'dataType': 'json',
'timeout': '20000',
'api': ORDER_LIST_API_NAME,
'valueType': 'string',
'sessionOption': 'AutoLoginOnly',
'spm_cnt': 'a21107h.42831410.0.0',
}
params['sign'] = generate_sign(params['t'], token, data_val)
return params
async def _request_order_page(self, page_number: int, allow_retry: bool = True) -> Dict[str, Any]:
await self._ensure_session()
assert self.session is not None
payload = {
'pageNumber': page_number,
'rowsPerPage': DEFAULT_PAGE_SIZE,
'orderIds': '',
'queryCode': ORDER_LIST_QUERY_CODE,
'orderSearchParam': '{}',
}
data_val = json.dumps(payload, separators=(',', ':'))
try:
params = self._build_request_params(data_val)
except Exception as exc:
raise RuntimeError(str(exc)) from exc
async with self.session.post(
ORDER_LIST_API_URL,
params=params,
data={'data': data_val},
headers=self._build_request_headers(),
) as response:
try:
res_json = await response.json(content_type=None)
except Exception as exc:
response_text = await response.text()
raise RuntimeError(
f'{self.cookie_id_for_log}】历史订单列表返回非 JSON: status={response.status}, body={response_text[:300]}'
) from exc
cookies_updated = await self._apply_response_cookie_updates(response.headers)
ret_value = res_json.get('ret', [])
if any('SUCCESS::调用成功' in str(ret) for ret in ret_value):
return res_json
if allow_retry and cookies_updated and self._is_auth_failure_ret(ret_value):
logger.warning(f"{self.cookie_id_for_log}】历史订单列表鉴权失败Cookie 更新后重试第 {page_number}")
return await self._request_order_page(page_number, allow_retry=False)
raise RuntimeError(f"{self.cookie_id_for_log}】历史订单列表 API 调用失败: {ret_value or res_json}")
def _normalize_order_candidate(self, raw: Dict[str, Any]) -> Optional[Dict[str, Any]]:
if not isinstance(raw, dict):
return None
common_data = raw.get('commonData') if isinstance(raw.get('commonData'), dict) else {}
buyer_info = raw.get('buyerInfoVO') if isinstance(raw.get('buyerInfoVO'), dict) else {}
price_info = raw.get('priceVO') if isinstance(raw.get('priceVO'), dict) else {}
order_id = str(common_data.get('orderId') or '').strip()
if not order_id:
return None
return {
'order_id': order_id,
'item_id': str(common_data.get('itemId') or '').strip() or None,
'sid': None,
'buyer_id': str(buyer_info.get('buyerId') or '').strip() or None,
'buyer_nick': str(buyer_info.get('userNick') or '').strip() or None,
'order_status': normalize_order_history_status(common_data.get('orderStatus')) or str(common_data.get('orderStatus') or '').strip() or None,
'amount': (
normalize_history_amount(price_info.get('totalPrice')) or
normalize_history_amount(price_info.get('confirmFee')) or
normalize_history_amount(price_info.get('auctionPrice'))
),
'platform_created_at': parse_local_datetime_text_to_db_utc(common_data.get('createTime')),
'platform_paid_at': parse_local_datetime_text_to_db_utc(common_data.get('paySuccessTime')),
'platform_completed_at': parse_local_datetime_text_to_db_utc(common_data.get('finishTime')),
'raw_source': raw,
}
async def open(self) -> bool:
return True
async def close(self) -> None:
if self.session and not self.session.closed:
await self.session.close()
self.session = None
await self.fetcher.close()
async def fetch_recent_orders(
self,
max_orders: int = 100,
max_scroll_rounds: int = 12,
utc_start: Optional[str] = None,
utc_end_exclusive: Optional[str] = None,
) -> Dict[str, Any]:
del max_scroll_rounds
if max_orders <= 0:
return {
'orders': [],
'scanned_count': 0,
'matched_count': 0,
'out_of_range_count': 0,
'pages_scanned': 0,
'stopped_by_range': False,
}
await self.open()
captured_orders: List[Dict[str, Any]] = []
seen_order_ids = set()
page_number = 1
scanned_count = 0
out_of_range_count = 0
pages_scanned = 0
stopped_by_range = False
while len(captured_orders) < max_orders:
response_json = await self._request_order_page(page_number)
pages_scanned += 1
module = ((response_json.get('data') or {}).get('module') or {})
items = module.get('items') or []
next_page = str(module.get('nextPage') or '').lower() == 'true'
total_count = str(module.get('totalCount') or '').strip() or 'unknown'
if not isinstance(items, list):
items = []
page_scanned_count = 0
page_in_range_count = 0
page_before_count = 0
page_after_count = 0
page_unknown_count = 0
for raw_item in items:
candidate = self._normalize_order_candidate(raw_item)
if not candidate:
continue
order_id = candidate.get('order_id')
if not order_id or order_id in seen_order_ids:
continue
seen_order_ids.add(order_id)
scanned_count += 1
page_scanned_count += 1
anchor_time = resolve_order_history_anchor_time(candidate)
range_status = classify_order_history_range(anchor_time, utc_start=utc_start, utc_end_exclusive=utc_end_exclusive)
if range_status == 'in_range':
captured_orders.append(candidate)
page_in_range_count += 1
else:
out_of_range_count += 1
if range_status == 'before':
page_before_count += 1
elif range_status == 'after':
page_after_count += 1
else:
page_unknown_count += 1
if len(captured_orders) >= max_orders:
break
logger.info(
f"{self.cookie_id_for_log}】历史订单列表第 {page_number} 页抓取完成: "
f"page_items={len(items)}, scanned={page_scanned_count}, in_range={page_in_range_count}, "
f"before_range={page_before_count}, after_range={page_after_count}, unknown_anchor={page_unknown_count}, "
f"captured={len(captured_orders)}, totalCount={total_count}, nextPage={next_page}"
)
if len(captured_orders) >= max_orders or not next_page or not items:
break
if (
utc_start and utc_end_exclusive
and page_scanned_count > 0
and page_in_range_count == 0
and page_unknown_count == 0
and page_before_count == page_scanned_count
):
stopped_by_range = True
logger.info(
f"{self.cookie_id_for_log}】历史订单列表在第 {page_number} 页已全部早于开始时间,停止继续翻页"
)
break
page_number += 1
matched_orders = captured_orders[:max_orders]
return {
'orders': matched_orders,
'scanned_count': scanned_count,
'matched_count': len(matched_orders),
'out_of_range_count': out_of_range_count,
'pages_scanned': pages_scanned,
'stopped_by_range': stopped_by_range,
}
async def fetch_order_detail(self, order_id: str, force_refresh: bool = True) -> Optional[Dict[str, Any]]:
return await self.fetcher.fetch_order_detail(order_id, force_refresh=force_refresh)