diff --git a/images/singleuser/Dockerfile b/images/singleuser/Dockerfile index c99fad3..fab0835 100644 --- a/images/singleuser/Dockerfile +++ b/images/singleuser/Dockerfile @@ -1,266 +1,264 @@ FROM ubuntu:22.04 -ENV PYWIKIBOT_VERSION=7.7.2 +ENV PYWIKIBOT_VERSION=8.0.0 ENV EDITOR=/bin/nano ENV PYWIKIBOT_DIR=/srv/paws ENV DEBIAN_FRONTEND=noninteractive ## Begin minimal setup ## # Use bash as default shell, rather than sh ENV SHELL /bin/bash # Set up user ENV NB_USER tools.paws ENV NB_UID 52771 ENV HOME /home/paws RUN adduser --disabled-password \ --gecos "Default user" \ --uid ${NB_UID} \ --home ${HOME} \ --force-badname \ ${NB_USER} WORKDIR ${HOME} RUN apt-get update && \ apt-get install --yes \ python3-venv \ pip \ python3 ENV LC_ALL en_US.UTF-8 ENV LANG en_US.UTF-8 ENV LANGUAGE en_US.UTF-8 # Create venv directory, and let users install into it ENV VENV_DIR /srv/paws RUN install -d -o ${NB_USER} -g ${NB_USER} ${VENV_DIR} ENV PATH=/srv/paws/pwb:/srv/paws/bin:/srv/paws:$PATH USER ${NB_USER} RUN python3 -m venv /srv/paws RUN pip --no-cache-dir install -U pip setuptools wheel # Install base notebook packages RUN pip install --prefix=/srv/paws --no-cache-dir \ jupyterhub==3.0.0 \ notebook==6.4.12 \ jupyterlab==3.4.8 ## End minimal setup ## USER root # Setup nodesource, because node on Ubuntu is far too old to be useful ADD node/nodesource.gpg /etc/apt/trusted.gpg.d/nodesource.gpg ADD node/nodesource.list /etc/apt/sources.list.d/nodesource.list # Base building utilities that'll always be required, probably RUN apt-get update && \ apt-get install --yes \ git \ locales \ pkg-config \ build-essential \ gcc \ apt-transport-https RUN apt-get update --yes && \ apt-get install --yes \ python3-dev \ openjdk-11-jdk \ nodejs # Utilities RUN apt-get install --yes \ curl \ wget \ less \ dnsutils \ emacs \ links \ nano \ vim \ lsof \ mariadb-client # pyaudio RUN apt-get install --yes \ portaudio19-dev RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && \ locale-gen ## Install R ## # Use newer version of R # Binary packages from packagemanager.rstudio.com work against this. # Base R from Focal is only 3.6. ADD r/cran.gpg /etc/apt/trusted.gpg.d/cran.gpg ADD r/cran.list /etc/apt/sources.list.d/cran.list # Install languages needed and their core dev packages RUN apt-get update --yes && \ apt-get install --yes \ r-recommended \ r-base-dev \ r-cran-littler \ git \ curl \ gdebi \ # For R's mysql libmariadb-dev \ # For R's curl libcurl4-openssl-dev \ # for ipython kernels libzmq3-dev \ # For R's devtools libssl-dev # Install RStudio # give access to libssl1.1 for rstudio-server RUN echo "deb http://archive.ubuntu.com/ubuntu/ focal main restricted" > /etc/apt/sources.list.d/focal.list RUN apt-get update ENV RSTUDIO_SERVER_URL https://download2.rstudio.org/server/bionic/amd64/rstudio-server-2022.02.3-492-amd64.deb RUN curl --silent --location --fail ${RSTUDIO_SERVER_URL} > /tmp/rstudio-server.deb RUN gdebi -n /tmp/rstudio-server.deb && rm /tmp/rstudio-server.deb # remove focal repo RUN rm /etc/apt/sources.list.d/focal.list && apt-get update # Create user owned R libs dir # This lets users temporarily install packages ENV R_LIBS_USER /srv/r RUN install -d -o ${NB_USER} -g ${NB_USER} ${R_LIBS_USER} # R_LIBS_USER is set by default in /etc/R/Renviron, which RStudio loads. # We uncomment the default, and set what we wanna - so it picks up # the packages we install. Without this, RStudio doesn't see the packages # that R does. # Stolen from https://github.com/jupyterhub/repo2docker/blob/6a07a48b2df48168685bb0f993d2a12bd86e23bf/repo2docker/buildpacks/r.py RUN sed -i -e '/^R_LIBS_USER=/s/^/#/' /etc/R/Renviron && \ echo "R_LIBS_USER=${R_LIBS_USER}" >> /etc/R/Renviron USER ${NB_USER} RUN pip install --no-cache-dir \ jupyter-server-proxy \ rpy2 \ git+https://github.com/toolforge/jupyter-rsession-proxy.git@57d89b4 # Set CRAN mirror to rspm before we install anything COPY r/Rprofile.site /usr/lib/R/etc/Rprofile.site # RStudio needs its own config COPY r/rsession.conf /etc/rstudio/rsession.conf # Install the R Kernel RUN r -e "install.packages('IRkernel', version='1.3')" && \ r -e "IRkernel::installspec(prefix='${VENV_DIR}')" && \ rm -rf /tmp/downloaded_packages ## Done installing R USER root ## Setup OpenRefine ENV OPENREFINE_DIR /srv/openrefine ENV PATH=$PATH:$OPENREFINE_DIR RUN mkdir -p ${OPENREFINE_DIR} && cd ${OPENREFINE_DIR} && \ curl -L 'https://oss.sonatype.org/service/local/artifact/maven/content?r=releases&g=org.openrefine&a=openrefine&v=3.6.2&c=linux&p=tar.gz' | tar xzf - --strip=1 USER ${NB_USER} ENV REFINE_DIR /home/paws RUN pip install --no-cache-dir \ git+https://github.com/innovationOUtside/nb_serverproxy_openrefine@f10677f15cab1f1a0f6a4e80ee65e3598a041fae ## Done setting up OpenRefine USER root # Machine-learning type stuff RUN apt-get update && \ apt-get install --yes \ # For scipy & friends libblas-dev \ liblapack-dev \ libquadmath0 \ gfortran \ # for lxml libxml2-dev \ libxslt1-dev \ # for matplotlib libfreetype6-dev \ libpng-dev \ # for ipython kernels libzmq3-dev \ libreadline-dev \ # For PDFs and stuff pandoc \ texlive-xetex ## Install Julia # Install Julia directories and depot path ENV PATH=$PATH:/srv/julia/bin ENV JULIA_DEPOT_PATH /srv/julia-depot/ RUN install -d -o ${NB_USER} -g ${NB_USER} /srv/julia RUN install -d -o ${NB_USER} -g ${NB_USER} ${JULIA_DEPOT_PATH} USER ${NB_USER} # install julia and julia kernel COPY install-julia /tmp/install-julia RUN /tmp/install-julia ## Done Installing Julia RUN pip install --no-cache-dir \ retrolab \ jupyterlab-link-share>=0.2.4 \ nbgitpuller \ voila \ bash_kernel # Install the bash kernel RUN python -m bash_kernel.install --sys-prefix # Install mass amount of python libraries! COPY --chown=tools.paws:tools.paws requirements.txt /tmp/requirements.txt RUN pip --no-cache-dir install -r /tmp/requirements.txt # Install pywikibot -RUN git clone --branch $PYWIKIBOT_VERSION --recursive https://gerrit.wikimedia.org/r/pywikibot/core.git /srv/paws/pwb +RUN pip install pywikibot==$PYWIKIBOT_VERSION COPY --chown=tools.paws:tools.paws user-config.py /srv/paws/ COPY --chown=tools.paws:tools.paws user-fixes.py /srv/paws/ -COPY install-pwb /usr/local/bin/ -RUN /usr/local/bin/install-pwb COPY install-extensions /usr/local/bin/ RUN /usr/local/bin/install-extensions COPY banner /etc/bash.bashrc # use custom css to hide clusters tab COPY --chown=tools.paws:tools.paws hide_clusters_tab.css /home/paws/.jupyter/custom/custom.css ## Install SPARQL USER root RUN apt-get update && \ apt-get install --yes \ # For sparql kernel graphviz USER ${NB_USER} RUN pip install --no-cache-dir sparqlkernel RUN python3 -m jupyter sparqlkernel install --sys-prefix ## End SPARQL Install EXPOSE 8888 diff --git a/images/singleuser/install-pwb b/images/singleuser/install-pwb deleted file mode 100755 index f89eda1..0000000 --- a/images/singleuser/install-pwb +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash -set -e - -# Remove 'Pillow' from requirements.txt -sed -e '/Pillow/d' /srv/paws/pwb/requirements.txt > /tmp/requirements.txt - -/srv/paws/bin/pip install --no-cache-dir -r /tmp/requirements.txt -/srv/paws/bin/pip install --no-cache-dir -r /srv/paws/pwb/dev-requirements.txt -/srv/paws/bin/pip install --editable /srv/paws/pwb - -# cleanup -rm /tmp/requirements.txt diff --git a/paws/values.yaml b/paws/values.yaml index 01fd153..58f6b70 100644 --- a/paws/values.yaml +++ b/paws/values.yaml @@ -1,323 +1,323 @@ --- # pawsPublicEnabled enables the anonymous viewing service for notebooks pawsPublicEnabled: true pawspublic: nbserve: image: name: quay.io/wikimedia-paws-prod/nbserve tag: pr-227 # nbserve tag managed by github actions # pawspublic.nbserve.image.template safely defines image:tag name in yaml template: "{{ .Values.pawspublic.nbserve.image.name}}:{{.Values.pawspublic.nbserve.image.tag }}" replicas: 1 requests: memory: "20Mi" renderer: image: name: quay.io/wikimedia-paws-prod/renderer tag: pr-168 # renderer tag managed by github actions # pawspublic.nbserve.image.template safely defines image:tag name in yaml template: "{{ .Values.pawspublic.renderer.image.name}}:{{.Values.pawspublic.renderer.image.tag }}" requests: cpu: "10m" # give a token amount for local dev memory: "10Mi" ingress: host: public.hub.paws.local legacyHost: paws-public.wmflabs.org paws: # frontPageEnabled switches the URL path of / to a redirect to paws.ingress.frontRedirect frontPageEnabled: true ingress: legacyHost: paws.wmflabs.org # paws.ingress.frontHost should be the domain the URL path of / uses to redirect to docs frontHost: paws.wmcloud.org # paws.ingress.frontRedirect should be the destination for URL path of / at paws.ingress.frontHost frontRedirect: https://wikitech.wikimedia.org/wiki/PAWS jupyterhub: prePuller: containerSecurityContext: runAsUser: 52771 runAsGroup: 52771 hook: containerSecurityContext: runAsUser: 52771 runAsGroup: 52771 pause: containerSecurityContext: runAsUser: 52771 runAsGroup: 52771 proxy: chp: resources: requests: memory: "200Mi" cpu: .2 containerSecurityContext: runAsUser: 52771 runAsGroup: 52771 # jupyterhub.proxy.secretToken is a valid dummy value for development secretToken: "23f542cc4b1af000e68088f1acc7ca8275a67cf496bae15ead6a79b8c6702597" service: nodePorts: http: 32611 type: NodePort cull: timeout: 86400 hub: config: # updated auth object for chart version 0.11.0+ this is the local dev values MWOAuthenticator: client_id: fea321f1b6b5aed9fa83d5362839cd3d client_secret: 6b17e5b87ae5ee893f5d4ba8b0e2377c6c0c3fcc mw_index_url: https://meta.wikimedia.org/w/index.php Authenticator: admin_users: - BDavis_(WMF) - ABorrero_(WMF) - NSkaggs_(WMF) - Andrewbogott - Chicocvenancio - VRook_(WMF) JupyterHub: authenticator_class: mediawiki db: # jupyterhub.hub.db values are overridden in Cloud VPS url: sqlite:// type: sqlite-pvc upgrade: true extraVolumes: - name: homes hostPath: path: /srv/misc/shared/paws/project - name: dumps hostPath: path: /mnt/public/dumps # Without this, dumps becomes inaccessible and can hang the host - name: dumps-src1 hostPath: path: /mnt/nfs/dumps-clouddumps1001.wikimedia.org type: DirectoryOrCreate - name: dumps-src2 hostPath: path: /mnt/nfs/dumps-clouddumps1002.wikimedia.org type: DirectoryOrCreate extraVolumeMounts: - name: homes mountPath: /data/project - name: dumps mountPath: /public/dumps readOnly: true - name: dumps-src1 mountPath: /mnt/nfs/dumps-clouddumps1001.wikimedia.org readOnly: true - name: dumps-src2 mountPath: /mnt/nfs/dumps-clouddumps1002.wikimedia.org readOnly: true extraConfig: fixLabels: | def fix_labels(spawner, pod): del pod.metadata.labels['hub.jupyter.org/username'] return pod c.KubeSpawner.modify_pod_hook = fix_labels 00-myConfig: | localdev = True 10-myConfig: | import hmac import hashlib import subprocess import os import json from oauthenticator.mediawiki import MWOAuthenticator from tornado import gen from tornado.escape import url_escape from tornado.httpclient import AsyncHTTPClient class Auth(MWOAuthenticator): enable_auth_state = True def normalize_username(self, username): return username async def refresh_user(self, user, handler=None): client = AsyncHTTPClient() try: response = await client.fetch(f"https://meta.wikimedia.org/w/api.php?action=query&format=json&formatversion=2&meta=globaluserinfo&guiuser={url_escape(user.name)}", user_agent="PAWS-authenticator/0.1 (https://phabricator.wikimedia.org/tag/paws/)" ) locked = bool(json.loads(response.body)['query']['globaluserinfo'].get("locked", False)) if locked: await user.spawner.stop(now=True) return False else: return True except Exception as e: self.log.error(f"Error checking for Wikimedia lock on user {user.name}: {e}") return False # Notebook cookies keep user logged in @gen.coroutine # more information about where this comes from found here: # https://jupyterhub-kubespawner.readthedocs.io/en/latest/spawner.html#kubespawner.KubeSpawner.volumes def pre_spawn_start(self, user, spawner): auth_state = yield user.get_auth_state() identity = auth_state['MEDIAWIKI_USER_IDENTITY'] spawner.environment['ACCESS_KEY'] = auth_state['ACCESS_TOKEN_KEY'] spawner.environment['ACCESS_SECRET'] = auth_state['ACCESS_TOKEN_SECRET'] spawner.environment['CLIENT_ID'] = self.client_id spawner.environment['CLIENT_SECRET'] = self.client_secret spawner.environment['USER'] = identity['username'] # Set rather than use .extend! # Since otherwise the volumes list will grow each time # the spawner stops and starts! homedir = '/data/project/paws/userhomes/{}'.format(identity['sub']) homenfs = '/srv/misc/shared/paws/project/paws/userhomes/{}'.format(identity['sub']) # Create the homedir so docker doesn't do it as root os.makedirs(homedir, mode=0o755, exist_ok=True) if localdev == True: spawner.volumes = [ { 'name': 'home', 'hostPath': { 'path': homenfs } }, { 'name': 'dumps', 'hostPath': { 'path': '/public/dumps' } }, { 'name': 'dumps-src1', 'hostPath': { 'path': '/mnt/nfs/dumps-clouddumps1001.wikimedia.org' } }, { 'name': 'dumps-src2', 'hostPath': { 'path': '/mnt/nfs/dumps-clouddumps1002.wikimedia.org' } } ] else: spawner.volumes = [ { 'name': 'home', 'nfs': { 'server': nfs_home, 'path': homenfs } }, { 'name': 'dumps', 'nfs': { 'server': dumps_src1, 'path': '/' } }, { 'name': 'dumps-src1', 'nfs': { 'server': dumps_src1, 'path': '/' } }, { 'name': 'dumps-src2', 'nfs': { 'server': dumps_src2, 'path': '/' } } ] spawner.volume_mounts = [ { 'name': 'home', 'mountPath': '/home/paws' }, { 'name': 'dumps', 'mountPath': '/public/dumps/public', 'readOnly': True }, { 'name': 'dumps-src1', 'mountPath': '/mnt/nfs/dumps-clouddumps1001.wikimedia.org', 'readOnly': True }, { 'name': 'dumps-src2', 'mountPath': '/mnt/nfs/dumps-clouddumps1002.wikimedia.org', 'readOnly': True }, ] c.JupyterHub.authenticator_class = Auth c.JupyterHub.authenticate_prometheus = False c.JupyterHub.logo_file = '/srv/jupyterhub/PAWS.svg' c.JupyterHub.template_vars = { 'announcement': ('' 'Welcome to PAWS. ' 'Please ' ' report any issues on Phabricator, you can also give feedback here' '') } extraEnv: USER: tools.paws JUPYTERHUB_CRYPT_KEY: "4849a4d92a49cdf9a80b49486293e29966c4f02daefa0f5597cf14546bab09f8" MYSQL_HMAC_KEY: "9a33d49db4bb823e87187a11e4f6296bee41bc35c41dc195634dff440c1870f0" cookieSecret: 827902ad187337f83adc565dadfb4c095ce1962442aae043ac78948f9b216a8f podSecurityContext: fsGroup: 52771 image: name: quay.io/wikimedia-paws-prod/paws-hub tag: pr-236 # paws-hub tag managed by github actions containerSecurityContext: runAsUser: 52771 resources: requests: memory: "200Mi" cpu: .2 ingress: enabled: true hosts: - hub.paws.local ingressClassName: "nginx" # We are not on an autoscaling cluster, so we don't want this scheduling: userScheduler: enabled: false userPlaceholder: containerSecurityContext: runAsUser: 52771 runAsGroup: 52771 singleuser: cmd: - jupyterhub-singleuser - --LabApp.collaborative=true fsGid: 52771 image: name: quay.io/wikimedia-paws-prod/singleuser - tag: pr-257 # singleuser tag managed by github actions + tag: pr-250 # singleuser tag managed by github actions pullPolicy: Always memory: guarantee: 1G limit: 3G cpu: guarantee: .15 limit: 1 storage: type: none uid: 52771 # This must be false or this whole thing cannot work with restrictive PSP cloudMetadata: blockWithIptables: false extraEnv: HUB_DOMAIN: "hub.paws.local" # Check jupyterhub.ingress.hosts REFINE_DOMAIN: "*" # Check jupyterhub.ingress.hosts networkPolicy: egressAllowRules: privateIPs: true # needed for access to replicas # mysql configures the wiki replica backend variables mysql: domain: "svc.cluster.local" username: s52771 password: "iAmNotSecret0" minesweeper: enabled: false # most local-dev testers won't have the key to configs image: name: quay.io/wikimedia-paws-prod/minesweeper tag: pr-234 # minesweeper tag managed by github actions template: "{{ .Values.minesweeper.image.name }}:{{ .Values.minesweeper.image.tag }}" # If not deployed for prod use, we use the some hacks for testing localdev: enabled: true image: name: quay.io/wikimedia-paws-prod/jobber tag: pr-155 # jobber tag managed by github actions # mediawikiHacks.image.template safely defines image:tag name in yaml template: "{{ .Values.localdev.image.name}}:{{.Values.localdev.image.tag }}"