From cd1b5dcb0e869b4f0240d094a489274ddc0a2d0f Mon Sep 17 00:00:00 2001
From: Marco Clemencic <marco.clemencic@cern.ch>
Date: Fri, 26 Jun 2020 10:51:08 +0200
Subject: [PATCH] Never trust output to be UTF-8, always ignore errors

---
 python/LbNightlyTools/CheckoutMethods.py  | 34 ++++++++++++++---------
 python/LbNightlyTools/Configuration.py    |  2 +-
 python/LbNightlyTools/HTMLUtils.py        |  2 +-
 python/LbNightlyTools/Scripts/Checkout.py |  2 +-
 python/LbNightlyTools/Scripts/Install.py  |  7 +++--
 python/LbNightlyTools/Scripts/Release.py  |  4 +--
 python/LbNightlyTools/Utils.py            |  3 +-
 7 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/python/LbNightlyTools/CheckoutMethods.py b/python/LbNightlyTools/CheckoutMethods.py
index 2231a038..3b743a9d 100644
--- a/python/LbNightlyTools/CheckoutMethods.py
+++ b/python/LbNightlyTools/CheckoutMethods.py
@@ -220,7 +220,8 @@ class GitRepository(object):
             proc = Popen(['git', 'config', '--get-all', 'remote.origin.fetch'],
                          cwd=self.path,
                          stdout=PIPE)
-            if 'origin/merge-requests' not in proc.communicate()[0].decode():
+            if 'origin/merge-requests' not in proc.communicate()[0].decode(
+                    'utf-8', errors='replace'):
                 # it must be configured
                 __log__.getChild('git').debug(
                     'getting merge-requests branches')
@@ -240,7 +241,8 @@ class GitRepository(object):
         name -> url.
         '''
         proc = Popen(['git', 'remote', '-v'], cwd=self.path, stdout=PIPE)
-        lines = proc.communicate()[0].decode().splitlines()
+        lines = proc.communicate()[0].decode(
+            'utf-8', errors='replace').splitlines()
         pattern = re.compile(r'(\S+)\s+(\S+)\s+\(fetch\)$')
         return dict(
             m.groups() for m in filter(None, map(pattern.match, lines)))
@@ -252,15 +254,16 @@ class GitRepository(object):
         '''
         proc = Popen(['git', 'branch', '-a'], cwd=self.path, stdout=PIPE)
         return set(branch[2:].rstrip()
-                   for branch in proc.communicate()[0].decode().splitlines())
+                   for branch in proc.communicate()[0].decode(
+                       'utf-8', errors='replace').splitlines())
 
     def tags(self):
         '''
         Return a list of all tags known by the repository.
         '''
         proc = Popen(['git', 'tag'], cwd=self.path, stdout=PIPE)
-        return set(
-            tag.strip() for tag in proc.communicate()[0].decode().splitlines())
+        return set(tag.strip() for tag in proc.communicate()[0].decode(
+            'utf-8', errors='replace').splitlines())
 
     def add_remote(self, name, url, retry=True):
         '''
@@ -337,10 +340,11 @@ class GitRepository(object):
                 __log__.warning(str(err))
             return [
                 os.path.join(self.path,
-                             l.split()[1]) for l in
-                Popen(['git', 'submodule', 'status', '--recursive'],
-                      cwd=self.path,
-                      stdout=PIPE).communicate()[0].decode().splitlines()
+                             l.split()[1])
+                for l in Popen(['git', 'submodule', 'status', '--recursive'],
+                               cwd=self.path,
+                               stdout=PIPE).communicate()[0].decode(
+                                   'utf-8', errors='replace').splitlines()
             ]
         return []
 
@@ -407,7 +411,8 @@ class GitRepository(object):
         cmd = ['git', 'rev-parse']
         cmd.extend(args)
         return Popen(
-            cmd, cwd=self.path, stdout=PIPE).communicate()[0].decode().strip()
+            cmd, cwd=self.path, stdout=PIPE).communicate()[0].decode(
+                'utf-8', errors='replace').strip()
 
     def show_branch(self, *args):
         '''
@@ -529,7 +534,8 @@ class GitLabMergeRequestHandler(GitRepository):
         proc = Popen(['git', 'config', '--get-all', 'remote.origin.fetch'],
                      cwd=self.path,
                      stdout=PIPE)
-        if 'origin/merge-requests' not in proc.communicate()[0].decode():
+        if 'origin/merge-requests' not in proc.communicate()[0].decode(
+                'utf-8', errors='replace'):
             # it must be configured
             __log__.getChild('git').debug('getting merge-requests branches')
             fetch = ('+refs/merge-requests/*/head:'
@@ -820,12 +826,14 @@ def git(proj, url=None, commit=None, export=False, merges=None):
                 # (test requested commit first)
                 log.debug('looking for an equivalent commit')
                 for ref in eval('[{!r},'.format(commit_requested) +
-                                proc.communicate()[0].decode() + ']'):
+                                proc.communicate()[0].decode(
+                                    'utf-8', errors='replace') + ']'):
                     if repo.rev_parse(ref + ':') == tree:
                         proc = Popen(['git', 'rev-list', '--max-count=1', ref],
                                      cwd=repo.path,
                                      stdout=PIPE)
-                        commit = proc.communicate()[0].decode().strip()
+                        commit = proc.communicate()[0].decode(
+                            'utf-8', errors='replace').strip()
                         log.debug('reusing commit %s (%s)', commit, ref)
                         break  # we found a commit with the same content
                 else:  # we could not find it, so we stick to HEAD
diff --git a/python/LbNightlyTools/Configuration.py b/python/LbNightlyTools/Configuration.py
index be70e70b..29a9f738 100644
--- a/python/LbNightlyTools/Configuration.py
+++ b/python/LbNightlyTools/Configuration.py
@@ -143,7 +143,7 @@ class UTFStringIO(StringIO):
 
     def write(self, s):
         if isinstance(s, bytes):
-            s = s.decode('utf-8')
+            s = s.decode('utf-8', errors='replace')
         return StringIO.write(self, s)
 
 
diff --git a/python/LbNightlyTools/HTMLUtils.py b/python/LbNightlyTools/HTMLUtils.py
index b0d279fc..6fc8ad2d 100644
--- a/python/LbNightlyTools/HTMLUtils.py
+++ b/python/LbNightlyTools/HTMLUtils.py
@@ -464,7 +464,7 @@ class AddGitlabLinks(object):
                 if title:
                     title = cgi.escape(title, quote=True)
                     if not isinstance(title, unicode):
-                        title = title.decode('utf-8', 'replace')
+                        title = title.decode('utf-8', errors='replace')
                     return (
                         u'<a href="https://gitlab.cern.ch/{0}/'
                         u'merge_requests/{1}" data-toggle="tooltip" '
diff --git a/python/LbNightlyTools/Scripts/Checkout.py b/python/LbNightlyTools/Scripts/Checkout.py
index af2075b9..1b5fc804 100644
--- a/python/LbNightlyTools/Scripts/Checkout.py
+++ b/python/LbNightlyTools/Scripts/Checkout.py
@@ -274,7 +274,7 @@ class Script(BaseScript):
                     co_log.write(conv.head(title=os.path.basename(co_logfile)))
                     log = cgi.escape(project.checkout_log, quote=True)
                     if not isinstance(log, unicode):
-                        log = log.decode('utf-8', 'replace')
+                        log = log.decode('utf-8', errors='replace')
                     co_log.write(conv.process(log))
                     co_log.write(conv.tail())
 
diff --git a/python/LbNightlyTools/Scripts/Install.py b/python/LbNightlyTools/Scripts/Install.py
index c4f07489..a8f7d060 100644
--- a/python/LbNightlyTools/Scripts/Install.py
+++ b/python/LbNightlyTools/Scripts/Install.py
@@ -90,7 +90,7 @@ def _list_http(url):
                 self._text = ''
 
     parser = ListHTMLParser()
-    parser.feed(urlopen(url).read().decode())
+    parser.feed(urlopen(url).read().decode('utf-8', errors='replace'))
     return parser.data
 
 
@@ -100,7 +100,7 @@ def _list_ssh(url):
     '''
     host, path = url.split(':', 1)
     proc = Popen(['ssh', host, 'ls -a1 %r' % path], stdout=PIPE)
-    return proc.communicate()[0].decode().splitlines()
+    return proc.communicate()[0].decode('utf-8', errors='replace').splitlines()
 
 
 def _url_protocol(url):
@@ -620,7 +620,8 @@ class Script(PlainScript):
                 command = ['patch', '-p1', '-f', '-i', 'slot.patch']
                 proc = Popen(command, cwd=dest, stdout=PIPE, stderr=STDOUT)
                 out, _ = proc.communicate()
-                self.log.debug('output of %s:\n%s', command, out.decode())
+                self.log.debug('output of %s:\n%s', command,
+                               out.decode('utf-8', errors='replace'))
 
             if index_installed:
                 fixGlimpseIndexes(
diff --git a/python/LbNightlyTools/Scripts/Release.py b/python/LbNightlyTools/Scripts/Release.py
index 320f6940..185fb050 100644
--- a/python/LbNightlyTools/Scripts/Release.py
+++ b/python/LbNightlyTools/Scripts/Release.py
@@ -471,7 +471,7 @@ def createManifestFile(project, version, platform, build_dir):
                  stdout=PIPE,
                  stderr=PIPE)
     out, _err = proc.communicate()
-    out = out.decode()
+    out = out.decode('utf-8', errors='replace')
 
     # no check because we must have a dependency on LCGCMT
     match = re.search(r'LCGCMT_([^ ]+)', out)
@@ -502,7 +502,7 @@ def createManifestFile(project, version, platform, build_dir):
                      stdout=PIPE,
                      stderr=PIPE)
         out, _err = proc.communicate()
-        out = out.decode().splitlines()
+        out = out.decode('utf-8', errors='replace').splitlines()
         data_pkgs = [
             x.replace(' ', ',').split(',')[1:4:2] for x in out
             if re.search(r'DBASE|PARAM', x)
diff --git a/python/LbNightlyTools/Utils.py b/python/LbNightlyTools/Utils.py
index 22d3b6a4..95f43e88 100644
--- a/python/LbNightlyTools/Utils.py
+++ b/python/LbNightlyTools/Utils.py
@@ -234,7 +234,8 @@ def log_call(*args, **kwargs):
                 spilled_output[fd] = b''
                 for line in data.splitlines(True):
                     if line.endswith(b'\n'):
-                        log(log_level, line.decode().rstrip())
+                        log(log_level,
+                            line.decode('utf-8', errors='replace').rstrip())
                     else:
                         spilled_output[fd] += line
             else:
-- 
GitLab