From cd1b5dcb0e869b4f0240d094a489274ddc0a2d0f Mon Sep 17 00:00:00 2001 From: Marco Clemencic <marco.clemencic@cern.ch> Date: Fri, 26 Jun 2020 10:51:08 +0200 Subject: [PATCH] Never trust output to be UTF-8, always ignore errors --- python/LbNightlyTools/CheckoutMethods.py | 34 ++++++++++++++--------- python/LbNightlyTools/Configuration.py | 2 +- python/LbNightlyTools/HTMLUtils.py | 2 +- python/LbNightlyTools/Scripts/Checkout.py | 2 +- python/LbNightlyTools/Scripts/Install.py | 7 +++-- python/LbNightlyTools/Scripts/Release.py | 4 +-- python/LbNightlyTools/Utils.py | 3 +- 7 files changed, 32 insertions(+), 22 deletions(-) diff --git a/python/LbNightlyTools/CheckoutMethods.py b/python/LbNightlyTools/CheckoutMethods.py index 2231a038..3b743a9d 100644 --- a/python/LbNightlyTools/CheckoutMethods.py +++ b/python/LbNightlyTools/CheckoutMethods.py @@ -220,7 +220,8 @@ class GitRepository(object): proc = Popen(['git', 'config', '--get-all', 'remote.origin.fetch'], cwd=self.path, stdout=PIPE) - if 'origin/merge-requests' not in proc.communicate()[0].decode(): + if 'origin/merge-requests' not in proc.communicate()[0].decode( + 'utf-8', errors='replace'): # it must be configured __log__.getChild('git').debug( 'getting merge-requests branches') @@ -240,7 +241,8 @@ class GitRepository(object): name -> url. ''' proc = Popen(['git', 'remote', '-v'], cwd=self.path, stdout=PIPE) - lines = proc.communicate()[0].decode().splitlines() + lines = proc.communicate()[0].decode( + 'utf-8', errors='replace').splitlines() pattern = re.compile(r'(\S+)\s+(\S+)\s+\(fetch\)$') return dict( m.groups() for m in filter(None, map(pattern.match, lines))) @@ -252,15 +254,16 @@ class GitRepository(object): ''' proc = Popen(['git', 'branch', '-a'], cwd=self.path, stdout=PIPE) return set(branch[2:].rstrip() - for branch in proc.communicate()[0].decode().splitlines()) + for branch in proc.communicate()[0].decode( + 'utf-8', errors='replace').splitlines()) def tags(self): ''' Return a list of all tags known by the repository. ''' proc = Popen(['git', 'tag'], cwd=self.path, stdout=PIPE) - return set( - tag.strip() for tag in proc.communicate()[0].decode().splitlines()) + return set(tag.strip() for tag in proc.communicate()[0].decode( + 'utf-8', errors='replace').splitlines()) def add_remote(self, name, url, retry=True): ''' @@ -337,10 +340,11 @@ class GitRepository(object): __log__.warning(str(err)) return [ os.path.join(self.path, - l.split()[1]) for l in - Popen(['git', 'submodule', 'status', '--recursive'], - cwd=self.path, - stdout=PIPE).communicate()[0].decode().splitlines() + l.split()[1]) + for l in Popen(['git', 'submodule', 'status', '--recursive'], + cwd=self.path, + stdout=PIPE).communicate()[0].decode( + 'utf-8', errors='replace').splitlines() ] return [] @@ -407,7 +411,8 @@ class GitRepository(object): cmd = ['git', 'rev-parse'] cmd.extend(args) return Popen( - cmd, cwd=self.path, stdout=PIPE).communicate()[0].decode().strip() + cmd, cwd=self.path, stdout=PIPE).communicate()[0].decode( + 'utf-8', errors='replace').strip() def show_branch(self, *args): ''' @@ -529,7 +534,8 @@ class GitLabMergeRequestHandler(GitRepository): proc = Popen(['git', 'config', '--get-all', 'remote.origin.fetch'], cwd=self.path, stdout=PIPE) - if 'origin/merge-requests' not in proc.communicate()[0].decode(): + if 'origin/merge-requests' not in proc.communicate()[0].decode( + 'utf-8', errors='replace'): # it must be configured __log__.getChild('git').debug('getting merge-requests branches') fetch = ('+refs/merge-requests/*/head:' @@ -820,12 +826,14 @@ def git(proj, url=None, commit=None, export=False, merges=None): # (test requested commit first) log.debug('looking for an equivalent commit') for ref in eval('[{!r},'.format(commit_requested) + - proc.communicate()[0].decode() + ']'): + proc.communicate()[0].decode( + 'utf-8', errors='replace') + ']'): if repo.rev_parse(ref + ':') == tree: proc = Popen(['git', 'rev-list', '--max-count=1', ref], cwd=repo.path, stdout=PIPE) - commit = proc.communicate()[0].decode().strip() + commit = proc.communicate()[0].decode( + 'utf-8', errors='replace').strip() log.debug('reusing commit %s (%s)', commit, ref) break # we found a commit with the same content else: # we could not find it, so we stick to HEAD diff --git a/python/LbNightlyTools/Configuration.py b/python/LbNightlyTools/Configuration.py index be70e70b..29a9f738 100644 --- a/python/LbNightlyTools/Configuration.py +++ b/python/LbNightlyTools/Configuration.py @@ -143,7 +143,7 @@ class UTFStringIO(StringIO): def write(self, s): if isinstance(s, bytes): - s = s.decode('utf-8') + s = s.decode('utf-8', errors='replace') return StringIO.write(self, s) diff --git a/python/LbNightlyTools/HTMLUtils.py b/python/LbNightlyTools/HTMLUtils.py index b0d279fc..6fc8ad2d 100644 --- a/python/LbNightlyTools/HTMLUtils.py +++ b/python/LbNightlyTools/HTMLUtils.py @@ -464,7 +464,7 @@ class AddGitlabLinks(object): if title: title = cgi.escape(title, quote=True) if not isinstance(title, unicode): - title = title.decode('utf-8', 'replace') + title = title.decode('utf-8', errors='replace') return ( u'<a href="https://gitlab.cern.ch/{0}/' u'merge_requests/{1}" data-toggle="tooltip" ' diff --git a/python/LbNightlyTools/Scripts/Checkout.py b/python/LbNightlyTools/Scripts/Checkout.py index af2075b9..1b5fc804 100644 --- a/python/LbNightlyTools/Scripts/Checkout.py +++ b/python/LbNightlyTools/Scripts/Checkout.py @@ -274,7 +274,7 @@ class Script(BaseScript): co_log.write(conv.head(title=os.path.basename(co_logfile))) log = cgi.escape(project.checkout_log, quote=True) if not isinstance(log, unicode): - log = log.decode('utf-8', 'replace') + log = log.decode('utf-8', errors='replace') co_log.write(conv.process(log)) co_log.write(conv.tail()) diff --git a/python/LbNightlyTools/Scripts/Install.py b/python/LbNightlyTools/Scripts/Install.py index c4f07489..a8f7d060 100644 --- a/python/LbNightlyTools/Scripts/Install.py +++ b/python/LbNightlyTools/Scripts/Install.py @@ -90,7 +90,7 @@ def _list_http(url): self._text = '' parser = ListHTMLParser() - parser.feed(urlopen(url).read().decode()) + parser.feed(urlopen(url).read().decode('utf-8', errors='replace')) return parser.data @@ -100,7 +100,7 @@ def _list_ssh(url): ''' host, path = url.split(':', 1) proc = Popen(['ssh', host, 'ls -a1 %r' % path], stdout=PIPE) - return proc.communicate()[0].decode().splitlines() + return proc.communicate()[0].decode('utf-8', errors='replace').splitlines() def _url_protocol(url): @@ -620,7 +620,8 @@ class Script(PlainScript): command = ['patch', '-p1', '-f', '-i', 'slot.patch'] proc = Popen(command, cwd=dest, stdout=PIPE, stderr=STDOUT) out, _ = proc.communicate() - self.log.debug('output of %s:\n%s', command, out.decode()) + self.log.debug('output of %s:\n%s', command, + out.decode('utf-8', errors='replace')) if index_installed: fixGlimpseIndexes( diff --git a/python/LbNightlyTools/Scripts/Release.py b/python/LbNightlyTools/Scripts/Release.py index 320f6940..185fb050 100644 --- a/python/LbNightlyTools/Scripts/Release.py +++ b/python/LbNightlyTools/Scripts/Release.py @@ -471,7 +471,7 @@ def createManifestFile(project, version, platform, build_dir): stdout=PIPE, stderr=PIPE) out, _err = proc.communicate() - out = out.decode() + out = out.decode('utf-8', errors='replace') # no check because we must have a dependency on LCGCMT match = re.search(r'LCGCMT_([^ ]+)', out) @@ -502,7 +502,7 @@ def createManifestFile(project, version, platform, build_dir): stdout=PIPE, stderr=PIPE) out, _err = proc.communicate() - out = out.decode().splitlines() + out = out.decode('utf-8', errors='replace').splitlines() data_pkgs = [ x.replace(' ', ',').split(',')[1:4:2] for x in out if re.search(r'DBASE|PARAM', x) diff --git a/python/LbNightlyTools/Utils.py b/python/LbNightlyTools/Utils.py index 22d3b6a4..95f43e88 100644 --- a/python/LbNightlyTools/Utils.py +++ b/python/LbNightlyTools/Utils.py @@ -234,7 +234,8 @@ def log_call(*args, **kwargs): spilled_output[fd] = b'' for line in data.splitlines(True): if line.endswith(b'\n'): - log(log_level, line.decode().rstrip()) + log(log_level, + line.decode('utf-8', errors='replace').rstrip()) else: spilled_output[fd] += line else: -- GitLab