parsertenders/tgbot/services/test.py
2024-11-03 21:16:44 +07:00

164 lines
7.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import requests
import json
import pandas as pd
import re
from thefuzz import fuzz, process
from bs4 import BeautifulSoup
from openpyxl.cell.cell import ILLEGAL_CHARACTERS_RE
from datetime import datetime
import time
from tgbot.utils.misc.bot_logging import bot_logger
cookies = {
'jwt': 's%3ABearer%20...',
'referer': 'https://tenderplan.ru/app?key=0&tender=6639e01152e24fc13574139f',
'source': 'key=0&tender=6639e01152e24fc13574139f',
'tildauid': '1713888711831.359844',
'__ddg1_': 'ZKa7JlUseYuy3cvawO9W',
}
headers = {
'Accept': '*/*',
'Authorization': 'Bearer a4dc57cc44d5ca62a06a7b19660840a66f3048028b417bbc813a1acf6f3691da841b9120373431377409359f64430b0644ee22ddf072fbe6ad656b57eeebe83d',
'Sec-Fetch-Site': 'same-origin',
'Accept-Language': 'ru',
'Sec-Fetch-Mode': 'cors',
'Host': 'tenderplan.ru',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4.1 Safari/605.1.15',
'Connection': 'keep-alive',
'Referer': 'https://tenderplan.ru/app?key=0&tender=6639e01152e24fc13574139f',
'Sec-Fetch-Dest': 'empty',
}
def clean_text(text):
return re.sub(r'[^\w\s]', '', text).lower()
def split_search(search_string):
search_split = []
digit_count = sum(char.isdigit() for char in search_string)
if digit_count <= 1:
return []
for word in search_string.split(" "):
digit_count = sum(char.isdigit() for char in word)
if digit_count >= len(word) // 4 and len(word) >= 6 and not re.search('[а-яА-Я]', word):
search_split.append(word)
return search_split
def search_in_autopiter(search):
params = {'detailNumber': search, 'isFullQuery': 'true'}
try:
resp = requests.get('https://autopiter.ru/api/api/searchdetails', params=params)
goodauto = []
if resp.status_code == 200:
params = {'idArticles': [position.get('id') for position in resp.json().get('data').get('catalogs')]}
get_cost_resp = requests.get('https://autopiter.ru/api/api/appraise/getcosts', params=params)
for position in resp.json().get('data').get('catalogs'):
ap_name = position.get('name')
ap_id = position.get('id')
ap_number = position.get('number')
if get_cost_resp.status_code == 200:
ap_originalPrice = next((cost.get('originalPrice') for cost in get_cost_resp.json().get('data') if cost.get('id') == ap_id and cost.get('originalPrice') > 0), "--")
else:
ap_originalPrice = "--"
goodauto.append({
'ap_search': search,
'ap_name': ILLEGAL_CHARACTERS_RE.sub(r'', ap_name),
'fuzz': fuzz.partial_token_sort_ratio(search, ap_name),
'ap_number': ap_number,
'ap_originalPrice': ap_originalPrice,
'link_autopiter': f"https://autopiter.ru/goods/{ap_number}/{position.get('catalogUrl')}/id{ap_id}",
'ap_id': ap_id,
})
elif resp.status_code == 429:
raise Exception("429")
if goodauto:
goodauto = pd.DataFrame(goodauto).sort_values(by="fuzz", ascending=False)[:1]
else:
goodauto = [{
'ap_search': '----',
'ap_name': '----',
'fuzz': '',
'ap_number': '',
'ap_originalPrice': '',
'link_autopiter': '----',
}]
return goodauto
except Exception as e:
bot_logger.error(f"{e}")
return []
def tenders_with_goods(pagecount=1):
try:
tenders_with_goods = []
page = 0
while page < pagecount:
params = {'page': page}
response = requests.get('https://tenderplan.ru/api/tenders/getlist', params=params, cookies=cookies, headers=headers)
tenders = response.json().get('tenders')
page += 1
for tend in tenders:
tend_name = tend.get('orderName')
id = tend.get('_id')
params = {'id': id}
response = requests.get('https://tenderplan.ru/api/tenders/get', params=params, cookies=cookies, headers=headers)
if "ObjectInfo" in response.json().get('json'):
goods = json.loads(response.json().get('json'))["0"]["fv"]["0"]["fv"]["tb"]
submission_close_timestamp = int(json.loads(response.json().get('json'))['1']['fv']['1']['fv'])
submission_close_datetime = datetime.fromtimestamp(submission_close_timestamp / 1000).strftime('%Y-%m-%d %H:%M:%S')
for good in goods.values():
good_name = good.get('0').get('fv')
for name in split_search(good_name):
ap_search = search_in_autopiter(name)
for ap_s in ap_search.iterrows():
tenders_with_goods.append({
"tend_name": tend_name,
"tend_link": f"https://tenderplan.ru/app?tender={id}",
"tend_under": submission_close_datetime,
"good_name": good_name,
"ap_good_name": name,
"ap_search_name": ap_s[1].get('ap_name'),
"ap_search_fuzz": ap_s[1].get('fuzz'),
"ap_search_link": ap_s[1].get('link_autopiter'),
"ap_id": ap_s[1].get('ap_id'),
"ap_search_price": ap_s[1].get('ap_originalPrice'),
})
tenders_with_goods = get_all_price(tenders_with_goods)
tends = pd.DataFrame(tenders_with_goods)
tends.to_excel('tgbot/data/tenders_with_goods.xlsx')
return tenders_with_goods
except Exception as e:
bot_logger.error(f"{e}")
def get_all_price(tenders_with_goods):
ap_ids = [ap.get('ap_id') for ap in tenders_with_goods]
params = {'idArticles': ap_ids}
get_cost_resp = requests.get('https://autopiter.ru/api/api/appraise/getcosts', params=params)
retry_count = 0
while get_cost_resp.status_code == 429 and retry_count < 10:
time.sleep(5)
retry_count += 1
get_cost_resp = requests.get('https://autopiter.ru/api/api/appraise/getcosts', params=params)
if get_cost_resp.status_code == 200:
cost_resp = get_cost_resp.json().get('data')
for ap in tenders_with_goods:
id = ap.get("ap_id")
cost = next((cost.get('originalPrice') for cost in cost_resp if cost.get('id') == id), None)
if cost:
ap['ap_search_price'] = cost
return tenders_with_goods
def beautitext(text):
texts = text.split(" ")
tt = [te for te in texts if sum(char.isdigit() for char in te) >= len(te) // 2]
return str(tt)