Commons:Photo challenge/code/CreateVotingDuo.cs

From Wikimedia Commons, the free media repository
Jump to navigation Jump to search
//Create Voting pages code for duo photos
//Author user:colin
//License: Public domain
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using HtmlAgilityPack;

namespace CreateVoting
{
    internal class Program
    {
        private static void Main(string[] args)
        {
            const string challenge = "2016 - December - Home appliances";
            DateTime? minCreateDate = null; //  new DateTime(2015, 9, 1); // Change for theme? 
            bool draft = false;

            string outFile = (challenge + ".txt").Replace('/', '_');

            // Will need to work on this for multi-month challenges
            string theme = challenge;
            DateTime minUploadDate;
            DateTime maxUploadDate;
            string[] parts = challenge.Split(new[] {" - "}, StringSplitOptions.RemoveEmptyEntries);
            if (parts.Length == 3)
            {
                theme = parts[2];
                string minUploadText = "1 " + parts[1] + parts[0];

                if (DateTime.TryParse(minUploadText, out minUploadDate))
                {
                    maxUploadDate = minUploadDate.AddMonths(1);
                }
                else
                {
                    minUploadDate = new DateTime(2015, 9, 1);
                    maxUploadDate = new DateTime(2015, 11, 1);
                }
            }
            else
            {
                minUploadDate = new DateTime(2015, 9, 1);
                maxUploadDate = new DateTime(2015, 11, 1);
            }

            DateTime closeTime = maxUploadDate.AddDays(-1);
            DateTime voteCloseTime = maxUploadDate.AddMonths(1).AddDays(-1);

            string url = "https://commons.wikimedia.org/w/index.php?title=Commons:Photo_challenge/" +
                         challenge.Replace(' ', '_') + "&action=raw";

            using (var errorWriter = new StreamWriter("Errors-" + outFile, false, Encoding.UTF8))
            {
                List<string> wikiText = DownloadWikiFile(url, errorWriter);

                if (wikiText == null)
                {
                    errorWriter.WriteLine("No wikitext");
                    return;
                }

                var infos = new List<FileInfo>();

                int p = 0;
                FileInfo previous = null;
                bool foundGallery = false;
                foreach (string line in wikiText)
                {
                    if (foundGallery)
                    {
                        if (line.StartsWith("<!--"))
                        {
                            continue;
                        }
                        if (line.StartsWith("</gallery>"))
                        {
                            break;
                        }

                        if (string.IsNullOrWhiteSpace(line))
                        {
                            continue;
                        }

                        int bar = line.IndexOf('|');
                        string fileName = (bar == -1) ? line : line.Substring(0, bar);
                        string title = (bar == -1) ? string.Empty : line.Substring(bar + 1);

                        if (fileName.ToLower().StartsWith("file:"))
                        {
                            fileName = fileName.Substring(5);
                        }
                        fileName = fileName.Replace('_', ' ');

                        if (string.IsNullOrWhiteSpace(title))
                        {
                            int dot = fileName.LastIndexOf('.');
                            title = (dot == -1) ? fileName : fileName.Substring(0, dot);
                        }

                        if (fileName == "W2321-ToInsertYourPicToChallengeClickBelow.svg")
                        {
                            continue;
                        }

                        FileInfo info;

                        if (fileName == "Not on Commons")
                        {
                            info = new FileInfo()
                            {
                                FileName = fileName,
                                Title = title
                            };
                        }
                        else
                        {
                            if (fileName == "Intérieur de l'église Saint Jacques le Majeur, Montrouge, France.JPG")
                            {
                                info = new FileInfo()
                                {
                                    FileName = fileName,
                                    Title = title,
                                    Creator = "Ibex73",
                                    Uploaded = new DateTime(2016, 3, 13, 13, 3, 0),
                                    Width = 2592,
                                    Height = 1944,
                                    OwnWork = true
                                };
                            }
                            else if (fileName == "Gare de l'est intérieur 2011.jpg")
                            {
                                info = new FileInfo()
                                {
                                    FileName = fileName,
                                    Title = title,
                                    Creator = "Palamède",
                                    Uploaded = new DateTime(2016, 3, 5, 8, 28, 0),
                                    Width = 4288,
                                    Height = 2848,
                                    OwnWork = true
                                };
                            }
                            else if (fileName == "Paris Gare de l'est extérieur 2011.jpg")
                            {
                                info = new FileInfo()
                                {
                                    FileName = fileName,
                                    Title = title,
                                    Creator = "Palamède",
                                    Uploaded = new DateTime(2016, 3, 5, 8, 16, 0),
                                    Width = 4288,
                                    Height = 2848,
                                    OwnWork = true
                                };
                            }
                            else
                            {
                                info = GetFileInfo(fileName, title, errorWriter);
                            }
                        }

                        if (info == null)
                        {
                            errorWriter.WriteLine("BAD: " + fileName);
                        }
                        else
                        {
                            Console.WriteLine(info.FileName);

                            if (p % 2 == 0)
                            {
                                previous = info;
                            }
                            else
                            {
                                previous.Partner = info;
                                info.Partner = previous;
                                infos.Add(info);
                                previous = null;
                            }
                        }

                        ++p;
                    }
                    else
                    {
                        if (line.StartsWith("<gallery ") && line.Contains("250px"))
                        {
                            foundGallery = true;
                        }
                    }
                }

                if (previous != null)
                {
                    errorWriter.WriteLine("Unpaired previous" + previous.Title);
                }
                
                using (var writer = new StreamWriter(outFile, false, Encoding.UTF8))
                {
                    /*
                    writer.WriteLine("<gallery mode=\"packed-hover\" widths=250px heights=250px perrow=2 caption=\"Entries\">");
                    foreach (FileInfo pr in infos.OrderBy(im => im.Uploaded))
                    {
                        writer.WriteLine(pr.Partner.FileName + "|" + pr.Partner.Title);
                        writer.WriteLine(pr.FileName + "|" + pr.Title);
                    }
                    writer.WriteLine("</gallery>");
                    */

                    writer.WriteLine("__NOTOC__");
                    if (draft)
                    {
                        writer.WriteLine("{{Discussion top|THIS IS A DRAFT. DO NOT VOTE YET!}}");
                    }
                    writer.WriteLine("");
                    writer.WriteLine(
                        "'''Voting will end at midnight UTC on {0:dd MMMM yyyy}'''. The theme was '''{1}'''.",
                        voteCloseTime, theme);
                    writer.WriteLine("");
                    writer.WriteLine(
                        "{{Commons:Photo challenge/Voting header/{{SuperFallback|Commons:Photo challenge/Voting header}}}}");
                    writer.WriteLine("");
                    writer.WriteLine("===Sample===");
                    writer.WriteLine(
                        "[[File:Sample-image.svg|none|thumb|300x300px|Sample caption. [{{filepath:Sample-image.svg}}<br>''(Full size image)'']]]");
                    writer.WriteLine(
                        "*{{3/3*}} Great font! -- [[User:Colin|Colin]] ([[User talk:Colin|<span class=\"signature-talk\">talk</span>]]) 18:22, 2 January 2014 (UTC)");
                    writer.WriteLine("*{{2/3*}} -- '''Another person''' 18:20, 2 January 2014 (UTC)");
                    writer.WriteLine("*{{1/3*}} Cool. Love it. -- '''Yet another person''' 18:32, 2 January 2014 (UTC)");
                    writer.WriteLine("*{{0/3*}} Beautiful -- '''Someone else''' 13:30 3 January 2014 (UTC)");
                    writer.WriteLine("");

                    int i = 1;

                    foreach (FileInfo file in infos.OrderBy(im => (im.Uploaded < im.Partner.Uploaded ? im.Uploaded : im.Partner.Uploaded)))
                    {
                        if (CheckForErrors(minCreateDate, minUploadDate, maxUploadDate, closeTime, file, errorWriter, writer) &
                            CheckForErrors(minCreateDate, minUploadDate, maxUploadDate, closeTime, file.Partner, errorWriter, writer))
                        {
                            continue;
                        }
                        
                        double ratio = (((double)file.Width) / file.Height);
                        double partnerRatio = (((double)file.Partner.Width) / file.Partner.Height);
                        int partnerWidth = (int)(file.Height * partnerRatio);
                        int combinedWidth = file.Width + partnerWidth;

                        double imageNewWidth = combinedWidth * Math.Sqrt(((double)240000) / (combinedWidth * file.Height));
                        int imageWidth = (int)((imageNewWidth * file.Width) / combinedWidth);
                        int imageHeight = (int)(imageWidth / ratio);
                        
                        writer.WriteLine("==={0}. {1}===", i, file.FileWithoutExt);
                        writer.WriteLine("{|");
                        writer.WriteLine("|- valign=\"top\"");
                        writer.WriteLine(
                            "|width=\"100pt\" |[[File:{0}|none|thumb|x{1}px|{2} [{{{{filepath:{0}}}}}<br>''(Full size image)'']]]",
                            file.Partner.FileName, imageHeight, file.Partner.Title);
                        writer.WriteLine(
                            "|width=\"100pt\" |[[File:{0}|none|thumb|x{1}px|{2} [{{{{filepath:{0}}}}}<br>''(Full size image)'']]]",
                            file.FileName, imageHeight, file.Title);
                        writer.WriteLine("|}");
                        writer.WriteLine(
                            "<!-- '''Creator:''' [[User:{0}|{0}]] -->{1}'''Uploaded:''' {2:dd MMMM yyyy} '''Size''': {3} × {4} ({5:F1} MP)",
                            file.Partner.Creator,
                            file.Partner.Created.HasValue
                                ? string.Format("'''Taken:''' {0:dd MMMM yyyy} ", file.Partner.Created)
                                : string.Empty,
                            file.Partner.Uploaded,
                            file.Partner.Width,
                            file.Partner.Height,
                            file.Partner.Megapixels);
                        writer.WriteLine(
                            "<!-- '''Creator:''' [[User:{0}|{0}]] -->{1}'''Uploaded:''' {2:dd MMMM yyyy} '''Size''': {3} × {4} ({5:F1} MP){{{{Collapse top|Current votes – please choose your own winners before looking}}}}",
                            file.Creator,
                            file.Created.HasValue
                                ? string.Format("'''Taken:''' {0:dd MMMM yyyy} ", file.Created)
                                : string.Empty,
                            file.Uploaded,
                            file.Width,
                            file.Height,
                            file.Megapixels);
                        writer.WriteLine("<!-- Vote below this line -->");
                        writer.WriteLine("<!-- Vote above this line -->");
                        writer.WriteLine("{{Collapse bottom}}");
                        writer.WriteLine("");

                        ++i;
                    }

                    var users = infos.GroupBy(info => info.Creator).Select(gr => new { Name = gr.Key, Nbr = gr.Count() }).OrderByDescending(u => u.Nbr);

                    var usersMaxed = infos.GroupBy(info => info.Creator).Select(gr => new { Name = gr.Key, Nbr = Math.Min(4, gr.Count()) }).OrderByDescending(u => u.Nbr);

                    int nbrEntriesIfCapped = usersMaxed.Sum(gr => gr.Nbr);

                    errorWriter.WriteLine(users.Count() + " users");
                    foreach (var user in users)
                    {
                        errorWriter.WriteLine(user.Name + " " + user.Nbr);
                    }

                    int nbrTakenDuringChallenge = infos.Count(info => info.Created >= minUploadDate && info.Created <= maxUploadDate);

                    errorWriter.WriteLine(infos.Count() + " photos and " + nbrTakenDuringChallenge + " taken during challenge. " + nbrEntriesIfCapped + " if capped");

                    if (draft)
                    {
                        writer.WriteLine("{{Discussion bottom}}");
                    }
                }
            }

            Console.WriteLine("Press a key");
            Console.ReadKey();
        }

        private static bool CheckForErrors(DateTime? minCreateDate, DateTime minUploadDate, DateTime maxUploadDate, DateTime closeTime, FileInfo file, StreamWriter errorWriter, StreamWriter writer)
        {
            if (minCreateDate.HasValue && (!file.Created.HasValue || file.Created < minCreateDate))
            {
                errorWriter.WriteLine("{0} created early by user {1}", file.Url, file.Creator);
                writer.WriteLine(
                    "<!-- REMOVED: {0} by [[User:{1}|{1}]] was taken {2:dd MMMM yyyy} too early ({3:dd MMMM yyyy}) -->",
                    file.FileName, file.Creator, file.Created, minCreateDate);
                writer.WriteLine("");
                return true;
            }

            if (file.Uploaded < minUploadDate)
            {
                errorWriter.WriteLine("{0} uploaded early by user {1}", file.Url, file.Creator);
                writer.WriteLine(
                    "<!-- REMOVED: {0} by [[User:{1}|{1}]] was uploaded {2:dd MMMM yyyy} before the challenge opened ({3:dd MMMM yyyy}) -->",
                    file.FileName, file.Creator, file.Uploaded, minUploadDate);
                writer.WriteLine("");
                return true;
            }

            if (file.Uploaded >= maxUploadDate)
            {
                errorWriter.WriteLine("{0} uploaded late by user {1}", file.Url, file.Creator);
                writer.WriteLine(
                    "<!-- REMOVED: {0} by [[User:{1}|{1}]] was uploaded {2:dd MMMM yyyy} after the challenge closed ({3:dd MMMM yyyy}) -->",
                    file.FileName, file.Creator, file.Uploaded, closeTime);
                writer.WriteLine("");
                return true;
            }

            return false;
        }

        private static FileInfo GetFileInfo(string fileName, string title, StreamWriter errorWriter)
        {
            var info = new FileInfo {FileName = fileName, Title = title};

            var request = (HttpWebRequest)WebRequest.Create(info.Url);

            HttpWebResponse response;
            try
            {
                response = (HttpWebResponse) request.GetResponse();
            }
            catch (Exception e)
            {
                errorWriter.WriteLine(info.Url + " gave " + e.Message);
                return null;
            }

            using (Stream receiveStream = response.GetResponseStream())
            {
                var doc = new HtmlDocument();
                doc.Load(receiveStream, Encoding.UTF8);

                HtmlNode docNode = doc.DocumentNode;

                HtmlNodeCollection uploadedNodes =
                    docNode.SelectNodes(
                        "//table[@class='wikitable filehistory']/tr/td[contains(@style, 'white-space: nowrap;')]/a[contains(@href, 'upload.wikimedia.org')]");
                if (uploadedNodes != null)
                {
                    HtmlNode firstupload = uploadedNodes.Last();
                    // 12:53, 8 March 2014
                    string date = firstupload.InnerText;
                    int comma = date.IndexOf(',');
                    if (comma != -1)
                    {
                        date = date.Substring(comma + 1) + " " + date.Substring(0, comma);
                        DateTime uploaded;
                        if (DateTime.TryParse(date, out uploaded))
                        {
                            info.Uploaded = uploaded;
                        }
                    }
                }

                if (!info.Uploaded.HasValue)
                {
                    errorWriter.WriteLine(info.Url + " had no uploaded date time.");
                }

                ////HtmlNodeCollection uploadNodes = docNode.SelectNodes("//table[@class='wikitable filehistory']/tr/td/a[contains(@href, '/wiki/User:') or contains(@href, '/w/index.php?title=User:')]");

                HtmlNodeCollection uploadNodes =
                    docNode.SelectNodes(
                        "//table[@class='wikitable filehistory']/tr/td/a[contains(@class, 'mw-userlink')]");

                if (uploadNodes != null)
                {
                    HtmlNode firstContrib = uploadNodes.Last();
                    info.Creator = firstContrib.InnerText;
                }

                if (string.IsNullOrEmpty(info.Creator))
                {
                    errorWriter.WriteLine(info.Url + " had no creator.");
                }

                HtmlNode finfoNode = docNode.SelectNodes("//span[@class='fileInfo']").FirstOrDefault();

                if (finfoNode != null)
                {
                    //(900 × 657 pixels, file size: 117 KB, MIME type: image/jpeg)
                    string inner = finfoNode.InnerText;
                    string[] words = inner.Substring(1).Split(new[] {' '});
                    string width = words[0].Replace(",", "");
                    string height = words[2].Replace(",", "");
                    string sizeNum = words[6].Replace(",", "");
                    string sizeDim = words[7].Substring(0, 2);
                    info.Width = int.Parse(width);
                    info.Height = int.Parse(height);
                    info.FileSizeMB = double.Parse(sizeNum);
                    if (sizeDim == "KB")
                    {
                        info.FileSizeMB = info.FileSizeMB/1024.0;
                    }
                }

                if (info.Width == 0)
                {
                    errorWriter.WriteLine(info.Url + " had no file info.");
                }

                // <time class="dtstart" datetime="2015-04-12 09:17:25">12 April 2015, 09:17:25</time>
                // <tr class="exif-datetimeoriginal">
                HtmlNodeCollection startNodes = docNode.SelectNodes("//time[@class='dtstart']");

                // Some images have object date before photo date
                HtmlNode startNode = startNodes == null ? null : startNodes.LastOrDefault();

                if (startNode != null)
                {
                    string timeStamp = startNode.GetAttributeValue("datetime", string.Empty);

                    DateTime created;
                    if (DateTime.TryParse(timeStamp, out created))
                    {
                        info.Created = created;
                    }
                }
                else
                {
                    startNodes = docNode.SelectNodes("//tr[@class='exif-datetimeoriginal']/td");

                    startNode = startNodes == null ? null : startNodes.FirstOrDefault();

                    if (startNode != null)
                    {
                        DateTime created;
                        if (DateTime.TryParse(startNode.InnerText, out created))
                        {
                            info.Created = created;
                        }
                    }
                }

                if (!info.Created.HasValue)
                {
                    errorWriter.WriteLine(info.Url + " had no created date.");
                }

                var ownWorkNodes = docNode.SelectNodes("//span[@class='int-own-work']");

                info.OwnWork = (ownWorkNodes != null);

                if (!info.OwnWork)
                {
                    errorWriter.WriteLine(info.Url + " may not be own work.");
                }
            }

            return info;
        }

        private static List<string> DownloadWikiFile(string url, StreamWriter errorWriter)
        {
            var request = (HttpWebRequest) WebRequest.Create(url);
            request.CookieContainer = new System.Net.CookieContainer();
            HttpWebResponse response;
            try
            {
                response = (HttpWebResponse) request.GetResponse();
            }
            catch (Exception e)
            {
                errorWriter.WriteLine(url + " gave " + e.Message);
                return null;
            }

            if ((response.StatusCode == HttpStatusCode.OK ||
                 response.StatusCode == HttpStatusCode.Moved ||
                 response.StatusCode == HttpStatusCode.Redirect) &&
                response.ContentType.StartsWith("text", StringComparison.OrdinalIgnoreCase))
            {
                var result = new List<string>();
                // if the remote file was found, download it
                using (Stream inputStream = response.GetResponseStream())
                {
                    using (TextReader reader = new StreamReader(inputStream))
                    {
                        while (true)
                        {
                            string line = reader.ReadLine();
                            if (line == null)
                            {
                                return result;
                            }

                            result.Add(line);
                        }
                    }
                }
            }

            errorWriter.WriteLine(url + " gave " + response.StatusCode + " " + response.StatusDescription);
            return null;
        }

        public class FileInfo
        {
            public string FileName { get; set; }
            public string Title { get; set; }
            public string Creator { get; set; }
            public DateTime? Created { get; set; }
            public DateTime? Uploaded { get; set; }
            public int Width { get; set; }
            public int Height { get; set; }
            public bool OwnWork { get; set; }

            public FileInfo Partner { get; set; }

            public int WidthForSize(int sizePx)
            {
                return (int)(Width * Math.Sqrt(((double)sizePx) / (Width * Height)));
            }

            public string Megapixels
            {
                get
                {
                    double mp = ((Width*Height)/1000.0/1000.0);
                    double mpRound = Math.Floor(mp*10.0)/10.0;

                    return mpRound.ToString("F1");
                }
            }

            public string FileWithoutExt
            {
                get
                {
                    int dot = FileName.LastIndexOf('.');
                    return (dot == -1) ? FileName : FileName.Substring(0, dot);
                }
            }

            public double AspectRatio
            {
                get
                {
                    double ratio = Width > Height ? ((double)Width / Height) : ((double)Height / Width);
                    return Math.Floor(ratio * 10.0) / 10.0;
                }
            }

            public string AspectRatioText
            {
                get
                {
                    return Width > Height
                        ? (AspectRatio.ToString("F1") + ":1")
                        : ("1:" + AspectRatio.ToString("F1"));
                }
            }

            public string Url
            {
                get
                {
                    return "https://commons.wikimedia.org/wiki/File:" + FileName.Replace(' ', '_').Replace("?", "%3F");
                }
            }

            public double FileSizeMB { get; set; }
        }
    }
}