$ pbzip2 -dc /public/dumps/public/enwiki/20210220/enwiki-20210220-pages-articles-multistream.xml.bz2 | grep pmc | grep -Eo "(url= *http[^}]+pmc *= *[0-9]|pmc *= *[0-9][^}]+url= *http)" | grep -Eo "url= *http[^|]+" | cut -f3 -d/ |sort| uniq -c | sort -nr| head -n 50 1181 www.ncbi.nlm.nih.gov 915 pubmed.ncbi.nlm.nih.gov 840 www.pnas.org 621 www.nature.com 492 journals.plos.org 465 doi.org 407 zookeys.pensoft.net 353 academic.oup.com 299 www.cell.com 299 www.bmj.com 285 dx.doi.org 268 www.sciencedirect.com 179 www.thelancet.com 174 www.parasite-journal.org 166 www.genetics.org 166 onlinelibrary.wiley.com 155 royalsocietypublishing.org 151 www.plosone.org 145 www.mdpi.com 127 www.pensoft.net 123 dx.plos.org 111 www.biomedcentral.com 111 web.archive.org 109 link.springer.com 101 www.frontiersin.org 100 books.google.com 97 nar.oxfordjournals.org 93 peerj.com 85 linkinghub.elsevier.com 84 www.cdc.gov 83 aem.asm.org 72 doi.wiley.com 72 advances.sciencemag.org 71 europepmc.org 67 wwwnc.cdc.gov 63 www.cmaj.ca 59 jb.asm.org 58 www.who.int 58 www.researchgate.net 56 journals.sagepub.com 51 jvi.asm.org 50 ajph.aphapublications.org 49 www.plantphysiol.org 49 bmjopen.bmj.com 48 aob.oxfordjournals.org 47 www.jbc.org 46 archive.org 45 www.cambridge.org 45 science.sciencemag.org 42 jnnp.bmj.com