跳转到内容

英文维基 | 中文维基 | 日文维基 | 草榴社区

用户:Antigng-bot/fileresizer

维基百科,自由的百科全书
#include <stdio.h>
#include <io.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <windows.h>
#include <process.h>
#include "network.h"
#include "auth.h"
#include "convert.h"
#include "struct.h"
#include "misc.h"
#define VIPS_PATH "E:\\Antigng-bot\\file\\vips-dev-8.4\\bin\\"
struct problemlist
{
	char *title;
	int pageid;
	struct problemlist *next;
};
struct neditargv
{
    int count;
	char *id;
	char *time;
	HTTP newtext;
};
SRWLOCK rwcs;
CRITICAL_SECTION tcs;
CRITICAL_SECTION fcs;
CRITICAL_SECTION hcs;
struct problemlist *pbl=0;
int ACTION=1;
static void clearpbl()
{
	struct problemlist *pre;
	while(pbl)
	{
		pre=pbl;
		pbl=pbl->next;
		free(pre->title);
		free(pre);
	}
	return;
}
static int filequery(const char *target,int ns)
{
	HTTP f;
	char line[2048]={0},url[4096]={0},snd[4096]={0},id[512]={0},title[512]={0},sroffset[2048]={0},offseto[512]={0};
	char statusline[128];
	char tg[1024];
	int status=0,next=0,retry=0,pageid=0;
	struct problemlist *temp=0;
	char *ctm[]={"cmcontinue"};
	char *ctv[1];
	char *idm[]={"pageid","title"};
	char *idv[2];
	URLEncode(target,strlen(target),tg,1023);
	ctv[0]=offseto;
	idv[0]=id;
	idv[1]=title;
	sprintf(url,"/w/api.php?action=query&format=xml&list=categorymembers&cmnamespace=%d&cmlimit=5000&cmtitle=%s",ns,target);
	do
	{
		strcpy(snd,url);
		if(next)
		{
			strcat(snd,"&cmcontinue=");
			strcat(snd,sroffset);
		}
		f=hopen();
		for(retry=0;retry<20;retry++)
		{
			if(get(snd,8888,1,f))
			{
				hclose(f);
				f=hopen();
			}
			else
			{
				hgets(statusline,127,f);
				if(strstr(statusline,"200")) break;
			}
		}
		if(retry==20)
		{
			hclose(f);
			return 1;
		}
		skipresponseheader(f);
		next=0;status=0;
		do
		{
			xmlparsetag(f,line);
			if(!next)
			{
				if(!strcmp(line,"continue"))
				{
					xmlparsearg(f,1,ctm,ctv);
					URLEncode(offseto,strlen(offseto),sroffset,990);
					next=1;
				}
			}
			if(!strcmp(line,"cm"))
			{
				xmlparsearg(f,2,idm,idv);
				if((pageid=atoi(id))>0)
				{
					temp=(struct problemlist *)malloc(sizeof(struct problemlist));
					temp->title=(char *)malloc(strlen(idv[1])+1);
					strcpy(temp->title,idv[1]);
					temp->pageid=atoi(idv[0]);
					temp->next=pbl;
					pbl=temp;
				}
			}
		}while(!heof(f));
		hclose(f);
	}while(next);
	return 0;
}
static int smartedit(struct neditargv *p,const char *reason, const char *tags)
{
	HTTP res;
	char line[2048],url[4096]={0};
	char reason_e[512];
	char tags_e[256];
	char aft[1024],statusline[128];
	char cur_token[128];
	char err_type[8192];
	char *erm[]={"code"};
	char *erv[1];
	int find=0;
	int has_err=0,token_err=0,filtered=0;
	int retry=0;
	erv[0]=err_type;
	if(reason) URLEncode(reason,strlen(reason),reason_e,510);
	if(tags) URLEncode(tags,strlen(tags),tags_e,254);
	sprintf(url,"/w/api.php?action=edit&pageid=%s&basetimestamp=%s",p->id,p->time);
	find=sprintf(aft,"%s%s&summary=%s&bot=1&minor=1&nocreate=1&format=xml&token=",tags?"&tags=":"",tags?tags_e:"",reason_e);
	if(find<0) return -4;
	do
	{
		res=hopen();;
		while(1)
		{
			AcquireSRWLockShared(&rwcs);
			if(hastoken) break;
			else ReleaseSRWLockShared(&rwcs);
			Sleep(100);
		}		
		aft[find]=0;
		strcat(aft,token);
		ReleaseSRWLockShared(&rwcs);
		hrewind(p->newtext);
		if(smartpost(url,p->newtext,aft,8888,1,res))
		{
			hclose(res);
			return -1;
		}
		hgets(statusline,127,res);
		if(!strstr(statusline," 200"))
		{
			hclose(res);
			return -2;
		}
		skipresponseheader(res);
		filtered=token_err=has_err=0;
		while(!heof(res))
		{
			if(xmlparsetag(res,line)==XML_HAS_VALUE)
			{
				if(!strcmp(line,"error"))
				{
					has_err=1;
					xmlparsearg(res,1,erm,erv);
					if((!strcmp(err_type,"notoken"))||(!strcmp(err_type,"badtoken")))
					{
						token_err=1;
					}
					else if(!strcmp(err_type,"abusefilter-warning"))
					{
						filtered=1;
					}
					break;
				}
			}
		}
		if(token_err)
		{
			AcquireSRWLockExclusive(&rwcs);
			if(!strcmp(aft+find,cur_token)) hastoken=0;
			ReleaseSRWLockExclusive(&rwcs);
		}
		retry++;
		hclose(res);
	}while((token_err||filtered)&&(retry<3));
	if(has_err) return -3;
	else return 0;
}
static int purge(int pageid)
{
	char url[3000];
	char nullstr[1]={0};
	HTTP h;
	sprintf(url,"/w/api.php?pageids=%d&action=purge&forcelinkupdate=1",pageid);
	h=hopen();
	if(post(url,nullstr,8888,1,h))
	{
		hclose(h);
		return 0;
	}
	hclose(h);
	return 1;
}

static int getextension(const char *title,char *extension[])
{
	int length=0;
	char ext[15];
	int count=0;
	int backcount=0;
	char ch;
	length=strlen(title)-1;
	while(length>0&&count<9)
	{
		ch=title[length];
		if(ch=='.') break;
		ext[count]=(ch>='A'&&ch<='Z')?(ch+'a'-'A'):ch;
		count++;
		length--;
	}
	ext[count]=0;
	if(count==0) return 0;
	for(backcount=0;backcount<count;backcount++) (*extension)[backcount]=ext[count-1-backcount];
	(*extension)[backcount]=0;
	return 1;
}
static int checkextension(const char *title)
{
	char ext[8],*ptoext=ext;
	if(!getextension(title,&ptoext)) return 0;
	if(!(strcmp(ext,"jpeg"))||!(strcmp(ext,"jpg"))||!(strcmp(ext,"bmp"))||!(strcmp(ext,"gif"))||!(strcmp(ext,"png"))) return 1;
	else return 0;
}
static int judgesize(int height,int width)
{
	if(height*width>200000) return 1;
	else if(height*width<105000) return 0;
	else return -1;
}

static int checksize(int filepageid,int *height,int *width)
{
	char url[256];
	HTTP h;
	char line[1024];
	char *iim[]={"user","width","height"};
	char *iiv[3];
	char cht[32],cwh[32],cu[1024];
	int success=0;
	iiv[0]=cu;
	iiv[1]=cwh;
	iiv[2]=cht;
	sprintf(url,"/w/api.php?action=query&format=xml&prop=imageinfo&iiprop=size|user&pageids=%d",filepageid);
	h=hopen();
	if(get(url,8888,0,h))
	{
		hclose(h);
		return -1;
	}
	if(skipresponseheader(h))
	{
		hclose(h);
		return -2;
	}
	*height=*width=0;
	while(!heof(h))
	{
		xmlparsetag(h,line);
		if(!strcmp(line,"ii"))
		{
			xmlparsearg(h,3,iim,iiv);
			*height=atoi(cht);
			*width=atoi(cwh);
			success=1;
			/*
			if(ACTION==0)
			{
				if(!str_hashquery(ulist,cu))
				{
					str_hashadd(ulist,cu);
					userinvolved++;
				}
			}
			*/
			break;
		}
	}
	hclose(h);
	if(!success) return -3;
	switch(judgesize(*height,*width))
	{
	case -1:
		return 0;
		break;
	case 0:
		return 2;
		break;
	case 1:
		return 1;
		break;
	default:
		return -1;
	}

}
struct hashlist *fair_use_cat=NULL;
struct hashlist *exempt_cat=NULL;
static int checknonfree(int filepageid)
{
	char url[4096];
	char cat[2048];
	char line[1024];
	HTTP h;
	int nonfree=0;
	char *mtn[]={"title"};
	char *mtv[1];
	mtv[0]=cat;
	sprintf(url,"/w/api.php?action=query&format=xml&prop=categories&cllimit=500&pageids=%d",filepageid);
	h=hopen();
	if(get(url,8888,0,h))
	{
		hclose(h);
		return -1;
	}
	if(skipresponseheader(h))
	{
		hclose(h);
		return -2;
	}
	while(!heof(h))
	{
		xmlparsetag(h,line);
		if(!strcmp(line,"cl"))
		{
			void *p;
			xmlparsearg(h,1,mtn,mtv);
			if(str_hashquery(fair_use_cat,cat,&p))
			{
				nonfree=1;
			}
			else if(str_hashquery(exempt_cat,cat,&p))
			{
				nonfree=0;
				break;
			}
		}
	}
	hclose(h);
	return nonfree;
}
static void stripfileprefix(const char *title, char *filename)
{
	int count=0;
	for(count=0;title[count+5];count++)
	{
		filename[count]=(title[count+5]==' ')?('_'):title[count+5];
	}
	filename[count]=0;
	return;
}
static void formappendcontent(const char *name,const char *boundary,const char *data,int datalength,int type,HTTP h)
{
	char name_e[2048];
	char des[2048];
	URLEncode(name,strlen(name),name_e,1023);
	hputs("--",2,h);
	hputs(boundary,strlen(boundary),h);
	WRITE('\n',h);
	if(type)
	{
		sprintf(des,"Content-Disposition: form-data; name=\"file\";filename=\"%s\"\nContent-Type: application/octet-stream; charset=UTF-8\nContent-Transfer-Encoding: binary\n\n",name_e);
		hputs(des,strlen(des),h);
		hwrite(data,datalength,h);
	}
	else
	{
		sprintf(des,"Content-Disposition: form-data; name=\"%s\"\nContent-Type: text/plain; charset=UTF-8\nContent-Transfer-Encoding: 8bit\n\n",name_e);
		hputs(des,strlen(des),h);
		hputs(data,datalength,h);
	}
	WRITE('\n',h);
	return;
}
static void formappendfinal(const char *boundary,HTTP h)
{
	hputs("--",2,h);
	hputs(boundary,strlen(boundary),h);
	hputs("--",2,h);
	WRITE('\n',h);
	return;
}
static int upload(const char *title,const char *newfile,int length,const char *reason)
{
	HTTP h,res;
	char url[4096];
	char line[8192];
	char tt[1024];
	char filename[1024];
	char raw_token[256];
	char reason_e[512];
	const char *boundary="--------------antigng_is_masturbating--------------";
	stripfileprefix(title,filename);
	h=hopen();
	formappendcontent(filename,boundary,newfile,length,1,h);
	while(1)
	{
		AcquireSRWLockShared(&rwcs);
		if(hastoken) break;
		else ReleaseSRWLockShared(&rwcs);
		Sleep(100);
	}		
	URLtryDecode(token,strlen(token),raw_token,128,1);
	ReleaseSRWLockShared(&rwcs);
	formappendcontent("token",boundary,raw_token,strlen(raw_token),0,h);
	formappendfinal(boundary,h);
	res=hopen();
	URLEncode(title,strlen(title),tt,990);
	URLEncode(reason,strlen(reason),reason_e,510);
	sprintf(url,"/w/api.php?action=upload&format=xml&filename=%s&comment=%s&ignorewarnings=1",tt,reason_e);
	if(postform(url,h,boundary,8888,1,res))
	{
		hclose(res);
		hclose(h);
		return -1;
	}
	else
	{
		int suc=0;
		char *mtn[]={"result"};
		char *mtv[1];
		char resstr[1024];
		mtv[0]=resstr;
		hclose(h);
		hgets(line,128,res);
		if(!strstr(line,"200 "))
		{
			hclose(res);
			return -2;
		}
		if(skipresponseheader(res))
		{
			hclose(res);
			return -3;
		}
		while(!heof(res))
		{
			xmlparsetag(res,line);
			if(!strcmp(line,"upload"))
			{
				xmlparsearg(res,1,mtn,mtv);
				suc=!strcmp(resstr,"Success");
				break;
			}
		}
		hclose(res);
		return suc;
	}
}
static int doremovetemp(int pageid,char *reason,char *timestamp, HTTP h)
{
	const char *match="{{non-free reduce}}";
	const char *MATCH="{{NON-FREE REDUCE}}";
	const int mtl=strlen(match);
	int status=0;
	char buffer[1000];
	int bfl=0;
	int i=0;
	HTTP res;
	char ch;
	int matched=0;
	res=hopen();
	hputs("&text=",strlen("&text="),res);
	while(xmlpulltext(h,&ch)==XML_TEXT_CONTINUE)
	{
		switch(status)
		{
		case 0:
			if(ch==match[0]||ch==MATCH[0])
			{
				status=1;
				buffer[0]=ch;
				bfl=1;
			}
			else
				smartURLEncode(ch,res);
			break;
		case 1:
			if(ch!=match[bfl]&&ch!=MATCH[bfl])
			{
				status=0;
				for(i=0;i<bfl;i++)
				{
					smartURLEncode(buffer[i],res);
				}
				bfl=0;
				buffer[0]=0;
				smartURLEncode(ch,res);
			}
			else
			{
				buffer[bfl]=ch;
				bfl++;
			}
			if(bfl==mtl)
			{
				status=0;
				bfl=0;
				buffer[0]=0;
				matched=1;
			}
			break;
		}
	}
	if(matched)
	{
		struct neditargv p;
		char ids[256]={0};
		sprintf(ids,"%d",pageid);
		p.id=ids;
		p.newtext=res;
		p.time=timestamp;
		smartedit(&p,reason,0);
		hclose(res);
		return 1;
	}
	else
	{
		hclose(res);
		return 0;
	}
}
static int removetemp(int querypageid,char *reason)
{
	char url[4096];
	char buf[8192];
	char pageid[256];
	char timestamp[256];
	char contentmodel[64],contentformat[64];
	const char *ttm[]={"pageid"};
	const char *tmm[]={"timestamp"};
	const char *cmm[]={"contentmodel","contentformat"};
	char *ttv[1];
	char *tmv[1];
	char *cmv[2];
	int result;
	int status;
	HTTP h;
	ttv[0]=pageid;
	tmv[0]=timestamp;
	cmv[0]=contentmodel;
	cmv[1]=contentformat;
	sprintf(url,"/w/api.php?action=query&format=xml&prop=revisions&rvprop=content|timestamp&pageids=%d&rvslots=main",querypageid);
	h=hopen();
	if(get(url,8888,1,h))
	{
		hclose(h);
		return -1;
	}
	if(skipresponseheader(h))
	{
		hclose(h);
		return -2;
	}
	status=0;
	while(!heof(h))
	{
		result=xmlparsetag(h,buf);
		switch(status)
		{
		case 0:
			if(result==XML_HAS_VALUE&&!strcmp(buf,"page"))
			{
				if(xmlparsearg(h,1,ttm,ttv)==XML_HAS_VALUE)
				{
					if(atoi(pageid)) status=1;
				}
			}
			break;
		case 1:
			if(result==XML_HAS_VALUE&&!strcmp(buf,"rev"))
			{
				xmlparsearg(h,1,tmm,tmv);
				status=2;
			}
			break;
		case 2:
			if(result==XML_HAS_VALUE&&!strcmp(buf,"slot"))
			{
				xmlparsearg(h,2,cmm,cmv);
				if(!strcmp(contentmodel,"wikitext")&&!strcmp(contentformat,"text/x-wiki")) 
					doremovetemp(querypageid,reason,timestamp,h);
				status=0;
			}
			break;
		}
	}
	hclose(h);
	return 0;
}

static int newsize(int height,int width)
{
	return (int)sqrt(100*1024*width/height);
}
static int fetchimageurl(const char *title, char *image_url)
{
	HTTP h;
	char title_e[2048];
	char url[4096];
	char line[4096];
	char *mtn[]={"url"};
	char *mtv[1];
	int success=0;
	mtv[0]=image_url;
	h=hopen();
	URLEncode(title,strlen(title),title_e,2046);
	sprintf(url,"/w/api.php?action=query&format=xml&prop=imageinfo&iiprop=url&titles=%s",title_e);
	if(get(url,8888,0,h))
	{
		hclose(h);
		return -1;
	}
	if(skipresponseheader(h))
	{
		hclose(h);
		return -2;
	}
	success=0;
	while(!heof(h))
	{
		xmlparsetag(h,line);
		if(!strcmp(line,"ii"))
		{
			if(xmlparsearg(h,1,mtn,mtv)==XML_HAS_VALUE) success=1;
			break;
		}
	}
	hclose(h);
	if(!success) return -3;
	return 0;
}
static int fetchimagebyurl(const char *url,const char *filename)
{
	HTTP h;
	FILE *fp=NULL;
	char statusline[127];
	h=hopen();
	if(get_b(url,8080,0,h))
	{
		hclose(h);
		return -1;
	}
	hgets(statusline,127,h);
	if(!strstr(statusline,"200 "))
	{
		hclose(h);
		return -2;
	}
	if(skipresponseheader(h))
	{
		hclose(h);
		return -3;
	}
	if((fp=fopen(filename,"wb+"))==NULL)
	{
		hclose(h);
		return -4;
	}
	else
	{
		char ch;
		while(!heof(h))
		{
			ch=hgetc(h);
			fputc(ch,fp);
		}
	}
	hclose(h);
	if(ferror(fp))
	{
		fclose(fp);
		return -5;
	}
	else
	{
		fclose(fp);
		return 0;
	}
}
static int isjpeg(const char *ext)
{
	return (!strcmp(ext,"JPG")||!strcmp(ext,"jpg")||!strcmp(ext,"JPEG")||!strcmp(ext,"jpeg"));
}
static int vips_rescaleimage(const char *title,int height,int width,char **newimage,int *length)
{
	char url[8192];
	char cmd_line[1024];
	char infilename[16];
	char outfilename[16];
	char outfileext[8],*ptooutfileext=outfileext;
	FILE *outfile;
	int newscale=0;
	int imagelen=0;
	int setquality=0;
	*length=0;
	*newimage=NULL;
	if(!getextension(title,&ptooutfileext))
	{
		return -1;
	}
	if(isjpeg(outfileext)) setquality=1;
	if(fetchimageurl(title,url))
	{
		return -2;
	}
	sprintf(infilename,"in.%s",outfileext);
	if(fetchimagebyurl(url,infilename))
	{
		return -3;
	}
	sprintf(outfilename,"out.%s",outfileext);
	if(!_access(outfilename,0))
	{
		if(remove(outfilename)) return -4;
	}
	newscale=height<width?newsize(height,width):newsize(height,width)*height/width;
	sprintf(cmd_line,VIPS_PATH"vipsthumbnail -s %d -p nohalo -f %s%s %s",newscale,outfilename,setquality==1?"[Q=95]":"",infilename);
	printf("%s\n",cmd_line);
	system(cmd_line);
	if((outfile=fopen(outfilename,"rb+"))==NULL)
	{
		return -5;
	}
	*newimage=(char *)calloc(1024*1024,1);
	imagelen=fread(*newimage,1,1024*1024,outfile);
	fclose(outfile);
	if(imagelen>=1024*1024)
	{
		free(*newimage);
		*newimage=NULL;
		return 0;
	}
	else
	{
		(*newimage)[imagelen]=0;
		*length=imagelen;
		return 1;
	}
}

static int proceed(struct problemlist *head)
{
	for(;head;head=head->next)
	{
		char *title=head->title;
		int filepageid=head->pageid;
		if(checkextension(title)>0)
		{
			int status=0;
			if(status=checknonfree(filepageid)>0)
			{
				int length,width,size;
				if((status=checksize(filepageid,&length,&width))==1)
				{
					char *newimage=NULL;
					if(vips_rescaleimage(title,length,width,&newimage,&size)>0)
					{
						Sleep(1000);
						if(upload(title,newimage,size,"bot: resizing large non-free images")==1)
						{
							Sleep(1000);
							removetemp(filepageid,"bot: image resized; removing {{non-free reduce}}");
							Sleep(1000);
							purge(filepageid);
						}
					}
					if(newimage) free(newimage);
				}
				else if(status==2) removetemp(filepageid,"bot: small enough; removing {{non-free reduce}}");
			}
		}
	}
	return 0;
}

static void ini_marks()
{
	fair_use_cat=hashini();
	exempt_cat=hashini();
	str_hashadd(fair_use_cat,G2U("Category:合理使用"),NULL);
	str_hashadd(fair_use_cat,G2U("Category:标志"),NULL);
	str_hashadd(fair_use_cat,G2U("Category:合理使用"),NULL);
	str_hashadd(fair_use_cat,G2U("Category:電影海報"),NULL);
	str_hashadd(fair_use_cat,G2U("Category:软件屏幕截图"),NULL);
	str_hashadd(fair_use_cat,G2U("Category:电子游戏屏幕截图"),NULL);
	str_hashadd(fair_use_cat,G2U("Category:网页屏幕截图"),NULL);
	str_hashadd(fair_use_cat,G2U("Category:合理使用图像"),NULL);
	str_hashadd(exempt_cat,G2U("Category:文件删除候选"),NULL);
	str_hashadd(exempt_cat,G2U("Category:不应直接缩小的非自由文件"),NULL);
	return;
}
int main(void)
{
	InitializeSRWLock(&rwcs);
	InitializeCriticalSection(&tcs);
	InitializeCriticalSection(&fcs);
	InitializeCriticalSection(&hcs);
	buckini(20);
	ini_marks();
st:		if(login("Antigng-bot@Antigng-bot","")) 
		{
			Sleep(1000*600);
			goto st;
		}
	AcquireSRWLockExclusive(&rwcs);
	hastoken=0;
	ReleaseSRWLockExclusive(&rwcs);
	_beginthread(tokenmanage,0,0);
	if(ACTION==1)
	{
		printf("working in action mode\n");
		filequery("Category:%E9%9C%80%E8%A6%81%E7%BC%A9%E5%B0%8F%E5%A4%A7%E5%B0%8F%E7%9A%84%E9%9D%9E%E8%87%AA%E7%94%B1%E6%96%87%E4%BB%B6",6);
		proceed(pbl);
		AcquireSRWLockExclusive(&rwcs);
		hastoken=-1;
		ReleaseSRWLockExclusive(&rwcs);
		clearpbl();
		printf("Ok. Done\n");
		Sleep(1000*600);
		destroycookielist();
		goto st;
	}
	else
	{
		/*
		struct neditargv p;
		ulist=hashini();
		printf("working in check mode\n");
		filequery("Category:%E5%90%88%E7%90%86%E4%BD%BF%E7%94%A8%E5%9B%BE%E5%83%8F",6);
		show(pbl);
		largelog=(struct fsz **)calloc((amount+5)*sizeof(struct fsz *),1);
		threadini(200);
		printf("Create %d threads to go through the problem list.\n",threadnumber);
		trig=1;
		while(pbl) 
		{
			Sleep(1000);
		}
		printf("waiting for all threads to exit. Current thread number: %d\n",threadnumber);
		while(threadnumber) Sleep(1000);
		mergesort(largelog,logpos);
		badimage=hopen();
		hputs("&text=",strlen("&text="),badimage);
		showfsz(largelog[0]);
		p.title="User:Antigng/largefile";
		p.newtext=badimage;
		smartedit(&p,"update_bad_image_list");
		baduser=hopen();
		hputs("&text=",strlen("&text="),baduser);
		logpos=generateuserlist();
		mergesort(userlog,logpos);
		showuser(userlog[0]);
		p.title="User:Antigng/largeuser";
		p.newtext=baduser;
		smartedit(&p,"update_bad_user_list");
		hclose(baduser);
		hclose(badimage);
		hashdestroy(ulist);*/
	}
	DeleteCriticalSection(&tcs);
	DeleteCriticalSection(&fcs);
	DeleteCriticalSection(&hcs);
	return 0;
}