#!/usr/bin/php from( 'revision' ) ->equals( 'rev_actor', 'actor_id' ) ->whereInt( 'rev_timestamp', $max_user_registration_timestamp, '<=' ); // search in an efficient way if the contributor has at least some edits in a period // it selects "1" if the "LIMIT 50, 1" returns something $subquery_contributions_in_period = ( new Query() ) ->from( 'revision' ) ->equals( 'rev_actor', 'actor_id' ) ->whereInt( 'rev_timestamp', $min_edits_range_timestamp_after, '>=' ) ->whereInt( 'rev_timestamp', $min_edits_range_timestamp_before, '<=' ) // Efficient "minimum number of rows" statement: // We want to discover if this query has at least MIN_EDITS_RANGE rows // but note that the COUNT(*) function means a full table scan. It's expensive. Don't do that. // Why? Because COUNTing *all* his/her/its contributions just to discover if they are greater than "$min_edits" is inefficient. // Instead, we can use the LIMIT clause to skip the "MIN_EDITS_RANGE - 1" rows and return just the next following row if it exists; // so we will return zero or one row depending if this user has at least MIN_EDITS_RANGE contributions or not, // and this is freacking efficient because the LIMIT clause uses indexes. // -- boz ->limit( MIN_EDITS_RANGE - 1, 1 ); // the final query to the user table with all the conditions $query = ( new Query() ) ->select( $EXPORT_FIELDS ) ->from( 'user' ) ->joinOn( 'INNER', 'actor', 'actor_user', 'user_id' ) // // this does not work because the user_registration field is NULL in Wikimedia Foundation replicas // ->whereInt( 'user_registration', $max_user_registration_timestamp, '<=' ) // must have at least one old contribution ->whereExists( $subquery_old_contributions ) // must have some number of contributions in the specified period ->whereExists( $subquery_contributions_in_period ); // select the desired database DB::instance()->selectDB( $wiki_database ); // show the current query message( "Executing on $wiki_database: {$query->getQuery()}" ); // export in a CSV file $out = fopen( $filename_export, 'w' ); // put an heading in the file with the exported field names fputcsv( $out, $EXPORT_FIELDS ); // export the users $users = $query->queryGenerator(); foreach( $users as $user ) { // export the user fields in the right order $data = []; foreach( $EXPORT_FIELDS as $field ) { $data[] = $user->{ $field }; } // write a line fputcsv( $fp, $data ); } // close the CSV file fclose( $out ); } $languages = [ 'it', 'fr', 'en', 'de', 'als', ]; $wikis = [ 'wiki', 'wikisource', 'wiktionary', 'wikivoyage', 'wikibooks', 'wikiquote', 'wikiversity', 'wikinews', ]; foreach( $languages as $language ) { foreach( $wikis as $wiki ) { // wiki codename like 'itwiki' $wiki_codename = $language . $wiki; // wiki database name like 'itwiki_p' $wiki_database = $wiki_codename . '_p'; // check if the database exists $wiki_exists = query_row( sprintf( "SELECT 1 FROM INFORMATION_SCHEMA.SCHEMATA WHERE SCHEMA_NAME = '%s'", esc_sql( $wiki_database ) ) ); // does the database exists? if( $wiki_exists ) { message( "Processing $wiki_database" ); // execute the report generate_wmch_report_for_wiki( $wiki_codename, $wiki_database ); } else { // nope, it does not exist // do nothing print( "Skip unexisting $wiki_database" ); } } } /** * Print a message * * @param string $message */ function message( $message ) { printf( "[%s] %s\n", date( 'Y-m-d H:i:s' ), $message ); }