User:A2569875-bot/Code/CreateCasRedirect.cpp
外观
#using <System.Xml.dll>
#using <System.dll>
#include <iostream>
#include <string>
#include <vector>
#include <chrono>
#include <regex>
using namespace System::Threading;
using namespace DotNetWikiBot;
#include "CreateCasRedirect.h"
#include "BotHeader.h"
//因隱私因素,省略部分本地端標頭檔
#include <random>
bool should_stop = false;
bool should_close = false;
bool created_page = false;
int added_count = 0;
const int max_added = 150;
std::wstring output_msg = std::wstring();
std::random_device rd;
std::default_random_engine gen = std::default_random_engine(rd());
int load_id = 0;
bool check_cas_no(std::string cas_test) {
std::vector<std::string> cas_check_split = split(cas_test, '-');
if (cas_check_split.size() != 3)return false;
int check_symbol = string_to_int(cas_check_split[2]);
std::string cas_without_desh = cas_check_split[0] + cas_check_split[1];
int check_sum = 0;
for (int i = 0; i < cas_without_desh.length(); ++i) {
int index = cas_without_desh.length() - i;
int check_symbol_by_index = string_to_int(std::string() + cas_without_desh.at(i));
if (check_symbol_by_index < 0)return false;
check_sum += index * check_symbol_by_index;
}
if (check_sum % 10 == check_symbol)return true;
return false;
}
System::String^ get_page_cas_no(Site^ zhWiki,System::String^ the_page_name) {
Page^ the_page = gcnew Page(zhWiki, the_page_name);
System::Text::RegularExpressions::Regex^ nextPortionRegex = gcnew System::Text::RegularExpressions::
Regex("([Cc][Aa][Ss][Nn][Oo][0-9]*|[Cc][Aa][Ss]([Nn][Oo][Ss])?|[Cc][Aa][Ss][號\\u53f7]|[Cc][Aa][Ss][ _]?[Nn]umber[ _]?[0-9]*)[ \\t\\n\\r]*\\=[ \\t\\n\\r]*[1-9]\\d{1,6}-\\d\\d-\\d");
//移除 "CASOther" 那些似乎尚未校對
//Regex("([Cc][Aa][Ss][Nn][Oo][0-9]*|[Cc][Aa][Ss]([Nn][Oo][Ss])?|[Cc][Aa][Ss][號\\u53f7]|[Cc][Aa][Ss][ _]?[Nn]umber[ _]?[0-9]*)[ \\t\\n\\r]*\\=[ \\t\\n\\r]*[1-9]\\d{1,6}-\\d\\d-\\d");
the_page->Load();
System::Text::RegularExpressions::MatchCollection^ matches = nextPortionRegex->Matches(the_page->text);
for each(System::Text::RegularExpressions::Match^ match in matches)
{
std::string match_sring;
std::string cas_sring;
if (match->Success) {
MarshalString(match->Value, match_sring);
}
std::cout << match_sring << std::endl;
std::size_t pos = match_sring.find('=');
if (pos != std::string::npos) {
std::regex cas_regex("[0-9]+-[0-9][0-9]-[0-9]");
std::smatch cas_match;
std::regex_search(match_sring, cas_match, cas_regex);
for (int j = 0; j < cas_match.size(); ++j) {
std::string the_casno = cas_match[j];
System::String^ checking_bytrim = gcnew System::String(the_casno.c_str());
checking_bytrim = checking_bytrim->Trim(' ')->Trim('\n')->Trim('\t')->Trim('\r')->Trim('-');
return gcnew System::String(the_casno.c_str()) + " ([[" + the_page_name + "]])";
}
}
}
return "";
}
int main() {
Site^ zhWiki;
zhWiki = login(zhWiki);
System::Collections::Generic::List<System::String^>^ page_result_pre = zhWiki->getPageNamesFromCategory("無CAS號重定向的物質條目", 5000);
System::Collections::Generic::List<System::String^>^ page_black_list = zhWiki->getPageNamesFromCategory("未提供參考文獻的CAS號", 5000);
System::Collections::Generic::List<System::String^>^ page_black_list2 = zhWiki->getPageNamesFromCategory("含有未校對CAS號的條目", 5000);
System::Collections::Generic::List<System::String^>^ page_result = gcnew System::Collections::Generic::List<System::String^>();
int page_count = 0;
bool should_add = true;
unsigned seed = (unsigned)time(NULL); // 取得時間序列
srand(seed); // 以時間序列當亂數種子
for each (System::String^ page_name_it in page_result_pre) {
bool can_edit_red = true;
for each (System::String^ page_name_itB in page_black_list) {
int comparison = System::String::Compare(page_name_it, page_name_itB, System::StringComparison::OrdinalIgnoreCase);
if (comparison == 0) {
std::cout << "Page " << page_name_itB << " is in the Category:未提供參考文獻的CAS號!!" << std::endl;
can_edit_red = false;
break;
}
}
if (can_edit_red) {
for each (System::String^ page_name_itB in page_black_list2) {
int comparison = System::String::Compare(page_name_it, page_name_itB, System::StringComparison::OrdinalIgnoreCase);
if (comparison == 0) {
std::cout << "Page " << page_name_itB << " is in the Category:含有未校對CAS號的條目!!" << std::endl;
can_edit_red = false;
break;
}
}
}
if (can_edit_red) {
page_result->Add(page_name_it);
++page_count;
}
//int test = page_black_list->IndexOf(page_name_it);
//int test2 = page_black_list2->IndexOf(page_name_it);
//if (test >= 0 && test2 >= 0) {
//page_result->Add(page_name_it);
//++page_count;
//}
}
std::cout << "found page :";
for each (System::String^ page_name_it in page_result) std::cout << page_name_it << ",";
std::cout << std::endl;
cli::array<System::Object^>^ array_data = {
(System::Object^)zhWiki,
(System::Object^)page_result
};
Thread^ do_by_time = gcnew Thread(gcnew ParameterizedThreadStart(run_tesk));
Thread^ wait_the_key = gcnew Thread(gcnew ThreadStart(waitkey));
do_by_time->Start(array_data);
wait_the_key->Start();
added_count = 0;
while (1) {
do_by_time->Join();
while (do_by_time->ThreadState != ThreadState::Stopped) do_by_time->Join();
do_by_time = gcnew Thread(gcnew ParameterizedThreadStart(run_tesk));
if (!should_close)do_by_time->Start(array_data);
else break;
}
return 0;
}
System::Collections::Generic::List<System::String^>^ TakeLastLines(System::String^ text, int count)
{
System::Collections::Generic::List<System::String^>^ lines = gcnew System::Collections::Generic::List<System::String^> ();
System::Text::RegularExpressions::Match^ match = System::Text::RegularExpressions::Regex::Match(text, "^.*$",
System::Text::RegularExpressions::RegexOptions::Multiline |
System::Text::RegularExpressions::RegexOptions::RightToLeft);
while (match->Success && lines->Count < count)
{
lines->Insert(0, match->Value);
match = match->NextMatch();
}
return lines;
}
int get_frequency(Site^ zhWiki) {
Page^ check_cmd = gcnew Page(zhWiki, "User:A2569875-bot/Frequency");
check_cmd->Load();
int test = 60000;
try
{
test = System::Int32::Parse(check_cmd->text);
}
catch (System::Exception^ ex)
{
std::cout << ex->ToString() << std::endl;
test = 60000;
}
return test;
}
void run_tesk(System::Object^ inputobj)
{
//if(added_count >= max_added)should_stop = true;
cli::array<System::Object^>^ array_data = safe_cast<cli::array<System::Object^>^>(inputobj);
System::Object^ zhWikiobj = array_data[0];
System::Object^ page_list_obj = array_data[1];
Site^ zhWiki = safe_cast<Site^>(zhWikiobj);
System::Collections::Generic::List<System::String^>^ page_result =
safe_cast<System::Collections::Generic::List<System::String^>^>(page_list_obj);
std::cout << "\ntask active!" << std::endl;
bool check_result = true;
try
{
check_result = check_command(zhWiki, page_result);
} catch (System::Exception^ ex1) {
check_result = false;
std::cout << "\n===========\nError!!\n" << std::endl;
std::cout << ex1->Message << std::endl;
std::cout << ex1->StackTrace << std::endl;
} catch (const std::exception& ex2) {
check_result = false;
std::cout << "\n===========\nError!!\n" << std::endl;
std::cout << ex2.what() << std::endl;
}
if (!should_stop) {
if (page_result->Count > 0) {
if (check_result) {
std::uniform_int_distribution<int> dis0(500, 10000);
std::uniform_int_distribution<int> dis(3000, dis0(gen));
std::uniform_int_distribution<int> dis2(0, page_result->Count);
load_id = dis2(gen) % page_result->Count;
std::cout << "prepare to create " << page_result[load_id] << std::endl;
try
{
create_cas_redirect(zhWiki, page_result);
}
catch (System::Exception^ ex1)
{
std::cout << "\n===========\nError!!\n" << std::endl;
std::cout << ex1->Message << std::endl;
std::cout << ex1->StackTrace << std::endl;
Thread::Sleep(dis(gen));
}
catch (const std::exception& ex2)
{
std::cout << "\n===========\nError!!\n" << std::endl;
std::cout << ex2.what() << std::endl;
Thread::Sleep(dis(gen));
}
}
else {
std::cout << "tesk Disable" << std::endl;
Thread::Sleep(30000);
}
}
else {
created_page = true;
System::Collections::Generic::List<System::String^>^ page_result_pre = zhWiki->getPageNamesFromCategory("無CAS號重定向的物質條目", 5000);
System::Collections::Generic::List<System::String^>^ page_black_list = zhWiki->getPageNamesFromCategory("未提供參考文獻的CAS號", 5000);
System::Collections::Generic::List<System::String^>^ page_black_list2 = zhWiki->getPageNamesFromCategory("含有未校對CAS號的條目", 5000);
bool should_add = true;
for each (System::String^ page_name_it in page_result_pre) {
bool can_edit_red = true;
for each (System::String^ page_name_itB in page_black_list) {
int comparison = System::String::Compare(page_name_it, page_name_itB, System::StringComparison::OrdinalIgnoreCase);
if (comparison == 0) {
std::cout << "Page " << page_name_itB << " is in the Category:未提供參考文獻的CAS號!!" << std::endl;
can_edit_red = false;
break;
}
}
if (can_edit_red) {
for each (System::String^ page_name_itB in page_black_list2) {
int comparison = System::String::Compare(page_name_it, page_name_itB, System::StringComparison::OrdinalIgnoreCase);
if (comparison == 0) {
std::cout << "Page " << page_name_itB << " is in the Category:含有未校對CAS號的條目!!" << std::endl;
can_edit_red = false;
break;
}
}
}
if (can_edit_red) {
page_result->Add(page_name_it);
}
//int test = page_black_list->IndexOf(page_name_it);
//int test2 = page_black_list2->IndexOf(page_name_it);
//if (test >= 0 && test2 >= 0) {
//}
}
std::cout << "found page :";
for each (System::String^ page_name_it in page_result) std::cout << page_name_it << ",";
std::cout << std::endl;
}
}
if (created_page) {
int frequency = get_frequency(zhWiki);
if (frequency <= 0)frequency = 60000;
created_page = false;
Thread::Sleep(frequency);
}
return;
}
void waitkey()
{
while (1) {
int chat_in = getchar();
if (chat_in == 'q')should_close = !should_close;
if (chat_in == 'p')should_stop = !should_stop;
}
}
Site^ login(Site^ zhWiki) {
zhWiki = LOGIN_TO_ZH_WIKIPEDIA;
return zhWiki;
}
void create_user_page(Site^ zhWiki) {
Page^ page_namp = gcnew Page(zhWiki, "User:A2569875/bot-page");
page_namp->Load();
Page^ theuser_page = gcnew Page(zhWiki, page_namp->text);
Page^ text_from = gcnew Page(zhWiki, "User:A2569875/bot-edit");
Page^ text_summary = gcnew Page(zhWiki, "User:A2569875/bot-Summary");
theuser_page->Load();
text_from->Load();
text_summary->Load();
theuser_page->text += "\n*" + text_from->text;
theuser_page->Save(text_summary->text, false);
}
void create_cas_redirect(Site^ zhWiki, System::Collections::Generic::List<System::String^>^ page_result) {
created_page = false;
if (page_result->Count > 0) {
std::cout << "read id = " << load_id << std::endl;
Page^ the_page = gcnew Page(zhWiki, page_result[load_id]);
System::Text::RegularExpressions::Regex^ nextPortionRegex = gcnew System::Text::RegularExpressions::
Regex("([Cc][Aa][Ss][Nn][Oo][0-9]*|[Cc][Aa][Ss]([Nn][Oo][Ss])?|[Cc][Aa][Ss][號\\u53f7]|[Cc][Aa][Ss][ _]?[Nn]umber[ _]?[0-9]*)[ \\t\\n\\r]*\\=[ \\t\\n\\r]*[1-9]\\d{1,6}-\\d\\d-\\d");
//移除 "CASOther" 那些似乎尚未校對
//Regex("([Cc][Aa][Ss][Nn][Oo][0-9]*|[Cc][Aa][Ss]([Nn][Oo][Ss])?|[Cc][Aa][Ss][號\\u53f7]|[Cc][Aa][Ss][ _]?[Nn]umber[ _]?[0-9]*)[ \\t\\n\\r]*\\=[ \\t\\n\\r]*[1-9]\\d{1,6}-\\d\\d-\\d");
the_page->Load();
System::Text::RegularExpressions::MatchCollection^ matches = nextPortionRegex->Matches(the_page->text);
for each(System::Text::RegularExpressions::Match^ match in matches)
{
std::string match_sring;
std::string cas_sring;
if (match->Success) {
MarshalString(match->Value, match_sring);
}
std::cout << match_sring << std::endl;
std::size_t pos = match_sring.find('=');
if (pos != std::string::npos) {
std::regex cas_regex("[0-9]+-[0-9][0-9]-[0-9]");
std::smatch cas_match;
std::regex_search(match_sring, cas_match, cas_regex);
for (int j = 0; j < cas_match.size(); ++j) {
std::string the_casno = cas_match[j];
System::String^ checking_bytrim = gcnew System::String(the_casno.c_str());
checking_bytrim = checking_bytrim->Trim(' ')->Trim('\n')->Trim('\t')->Trim('\r')->Trim('-');
Page^ the_redirect_page = gcnew Page(zhWiki, gcnew System::String(checking_bytrim));
//非法CAS號 跳過
if (!check_cas_no(the_casno)) {
std::cout << "Error: " << the_casno << " is an invalid CAS No." << std::endl;
continue;
}
the_redirect_page->Load();
if (!the_redirect_page->Exists()) {
System::Collections::Generic::List<System::String^>^ cats = the_page->GetAllCategories();
bool should_create_the_r = true;
//在貓裡面的String iterator (誤
for each (System::String^ str_iterator in cats)
{
if ((gcnew System::String("Category:未提供參考文獻的CAS號"))->Equals(str_iterator)) {
should_create_the_r = false;
break;
}
if ((gcnew System::String("Category:含有未校對CAS號的條目"))->Equals(str_iterator)) {
should_create_the_r = false;
break;
}
}if (should_create_the_r) {
the_redirect_page->text = "#重定向 [[" + page_result[load_id] + "]]\n{{CAS號重定向}}";
// //the_redirect_page->Save("(測試)機器人處理請求[[Special:Diff/47443111|建立CAS重定向]]:" + gcnew System::String(the_casno.c_str()) + " → " + page_result[load_id], true);
//std::cout << "save: " << ("(測試,第" + (gcnew int(added_count))->ToString() + "次,[[Special:Diff/47505626|暫時批准]]) 機器人處理請求[[Special:Diff/47443111|建立CAS重定向]]:[[" + gcnew System::String(the_casno.c_str()) + "]] → [[" + page_result[load_id] + "]] ([[60]]秒運行一次)") << std::endl;
the_redirect_page->Save(L"[[Wikipedia:机器人/申请/A2569875-bot|機器人]],[[Special:Diff/47443111|建立CAS重定向]]:[[" + gcnew System::String(the_casno.c_str()) + "]] → [[" + page_result[load_id] + "]]([[User:A2569875-bot#建立CAS號重定向|任務]])", true);
the_page->Save(L"[[Wikipedia:机器人/申请/A2569875-bot|機器人]],[[Special:Diff/47443111|建立CAS重定向]]:[[" + gcnew System::String(the_casno.c_str()) + "]] → [[" + page_result[load_id] + "]]([[User:A2569875-bot#建立CAS號重定向|任務]])", true);
++added_count;
std::cout << "create " << added_count << " pages." << std::endl;
created_page = true;
}
else {
std::cout << "Page " << the_casno << " is in the Category:未提供參考文獻的CAS號!!" << std::endl;
should_close = false;
}
}
else {
std::cout << "Page " << the_casno << " exist!!" << std::endl;
should_close = false;
}
}
}
}
page_result->RemoveAt(load_id);
}
else {
//should_close = true;
System::Collections::Generic::List<System::String^>^ page_result_pre = zhWiki->getPageNamesFromCategory("無CAS號重定向的物質條目", 5000);
System::Collections::Generic::List<System::String^>^ page_black_list = zhWiki->getPageNamesFromCategory("未提供參考文獻的CAS號", 5000);
System::Collections::Generic::List<System::String^>^ page_black_list2 = zhWiki->getPageNamesFromCategory("含有未校對CAS號的條目", 5000);
bool should_add = true;
for each (System::String^ page_name_it in page_result_pre) {
//int test = page_black_list->IndexOf(page_name_it);
//int test2 = page_black_list2->IndexOf(page_name_it);
//if (test >= 0 && test2 >= 0) {
page_result->Add(page_name_it);
//}
}
std::cout << "found page :";
for each (System::String^ page_name_it in page_result) std::cout << page_name_it << ",";
std::cout << std::endl;
}
}
void MyBot::Main() {
}