refactor: Remove unused symbolstore.py

2026-03-22 14:55:36 +08:00 · 2024-03-09 22:02:40 +01:00
parent 482407e4b3
commit a9e4f3073f
2 changed files with 0 additions and 544 deletions
--- a/scripts/symbolstore.py
+++ b/scripts/symbolstore.py
@@ -1,543 +0,0 @@
-#!/bin/env python
-# This Source Code Form is subject to the terms of the Mozilla Public
-# License, v. 2.0. If a copy of the MPL was not distributed with this
-# file, You can obtain one at http://mozilla.org/MPL/2.0/.
-#
-# This is a simplified version of Mozilla's symbolstore.py
-# 
-# Usage: create_symbolstore.py <params> <dump_syms path> <symbol store path>
-#                                <debug info files or dirs>
-#   Runs dump_syms on each debug info file specified on the command line,
-#   then places the resulting symbol file in the proper directory
-#   structure in the symbol store path.  Accepts multiple files
-#   on the command line, so can be called as part of a pipe using
-#   find <dir> | xargs symbolstore.pl <dump_syms> <storepath>
-#   But really, you might just want to pass it <dir>.
-#
-#   Parameters accepted:
-#     -c           : Copy debug info files to the same directory structure
-#                    as sym files. On Windows, this will also copy
-#                    binaries into the symbol store.
-#     -a "<archs>" : Run dump_syms -a <arch> for each space separated
-#                    cpu architecture in <archs> (only on OS X)
-
-import sys
-import os
-import re
-import shutil
-import fnmatch
-import subprocess
-import time
-import ctypes
-import concurrent.futures
-import multiprocessing
-import platform
-import tarfile
-
-from optparse import OptionParser
-
-# Utility functions
-
-# A cache of files for which VCS info has already been determined. Used to
-# prevent extra filesystem activity or process launching.
-vcsFileInfoCache = {}
-
-
-def get_platform_specific_dumper(**kwargs):
-    """This function simply returns a instance of a subclass of Dumper
-    that is appropriate for the current platform."""
-    return {'Windows': DumperWin32,
-            'Linux': DumperLinux,
-            'Darwin': DumperMac}[platform.system()](**kwargs)
-
-
-def start_job(dumper, lock, src_dir_repo_info, func_name, args):
-    # Windows worker processes won't have run GlobalInit,
-    # and due to a lack of fork(), won't inherit the class
-    # variables from the parent, so set them here.
-    Dumper.lock = lock
-    Dumper.src_dir_repo_info = src_dir_repo_info
-    return getattr(dumper, func_name)(*args)
-
-
-class JobPool(object):
-    jobs = {}
-    executor = None
-
-    @classmethod
-    def init(cls, executor):
-        cls.executor = executor
-
-    @classmethod
-    def shutdown(cls):
-        cls.executor.shutdown()
-
-    @classmethod
-    def submit(cls, args, callback):
-        cls.jobs[cls.executor.submit(start_job, *args)] = callback
-
-    @classmethod
-    def as_completed(cls):
-        """Like concurrent.futures.as_completed, but allows adding new futures
-        between generator steps. Iteration will end when the generator has
-        yielded all completed futures and JobQueue.jobs is empty.
-        Yields (future, callback) pairs.
-        """
-        while cls.jobs:
-            completed, _ = concurrent.futures.wait(cls.jobs.keys(), return_when=concurrent.futures.FIRST_COMPLETED)
-            for f in completed:
-                callback = cls.jobs[f]
-                del cls.jobs[f]
-                yield f, callback
-
-
-class Dumper:
-    """This class can dump symbols from a file with debug info, and
-    store the output in a flat directory structure.
-    Requires a path to a dump_syms binary--|dump_syms| and a directory
-    to store symbols in--|symbol_path|.
-
-    You don't want to use this directly if you intend to process files.
-    Instead, call GetPlatformSpecificDumper to get an instance of a
-    subclass.
-
-    Processing is performed asynchronously via worker processes; in
-    order to wait for processing to finish and cleanup correctly, you
-    must call Finish after all ProcessFiles calls have been made.
-    You must also call Dumper.GlobalInit before creating or using any
-    instances."""
-
-    srcdirRepoInfo = None
-    lock = None
-
-    def __init__(self, dump_syms, symbol_path,
-                 archs=None,
-                 srcdirs=None,
-                 exclude=None):
-        # popen likes absolute paths, at least on windows
-        if srcdirs is None:
-            srcdirs = []
-        if exclude is None:
-            exclude = []
-        self.dump_syms = os.path.abspath(dump_syms)
-        self.symbol_path = symbol_path
-        if archs is None:
-            # makes the loop logic simpler
-            self.archs = ['']
-        else:
-            self.archs = ['-a %s' % a for a in archs.split()]
-        self.srcdirs = [os.path.normpath(a) for a in srcdirs]
-        self.exclude = exclude[:]
-
-        # book-keeping to keep track of the cleanup work per file tuple
-        self.files_record = {}
-
-    @classmethod
-    def global_init(cls, executor=concurrent.futures.ProcessPoolExecutor):
-        """Initialize the class globals for the multiprocessing setup; must
-        be called before any Dumper instances are created and used. Test cases
-        may pass in a different executor to use, usually
-        concurrent.futures.ThreadPoolExecutor."""
-        num_cpus = multiprocessing.cpu_count()
-        if num_cpus is None:
-            # assume a dual core machine if we can't find out for some reason
-            # probably better on single core anyway due to I/O constraints
-            num_cpus = 2
-
-        # have to create any locks etc before the pool
-        manager = multiprocessing.Manager()
-        cls.lock = manager.RLock()
-        cls.srcdirRepoInfo = manager.dict()
-        JobPool.init(executor(max_workers=num_cpus))
-
-    @staticmethod
-    def output(destination, output_str):
-        """Writes |output_str| to |destination|, holding |lock|;
-        terminates with a newline."""
-        with Dumper.lock:
-            destination.write(output_str + "\n")
-            destination.flush()
-
-    def output_pid(self, dest, output_str):
-        """Debugging output; prepends the pid to the string."""
-        self.output(dest, "%d: %s" % (os.getpid(), output_str))
-
-    # subclasses override this
-    def should_process(self, file_name):
-        return not any(fnmatch.fnmatch(os.path.basename(file_name), exclude) for exclude in self.exclude)
-
-    # and can override this
-    def should_skip_dir(self, directory):
-        return False
-
-    @staticmethod
-    def run_file_command(file_name):
-        """Utility function, returns the output of file(1)"""
-        try:
-            # we use -L to read the targets of symlinks,
-            # and -b to print just the content, not the file_name
-            return os.popen("file -Lb " + file_name).read()
-        except OSError:
-            return ""
-
-    # This is a no-op except on Win32
-    def fix_filename_case(self, file_name):
-        return file_name
-
-    def finish(self, stop_pool=True):
-        """Process all pending jobs and any jobs their callbacks submit.
-        By default, will shutdown the executor, but for testcases that
-        need multiple runs, pass stop_pool = False."""
-        for job, callback in JobPool.as_completed():
-            try:
-                res = job.result()
-            except Exception as e:
-                self.output(sys.stderr, 'Job raised exception: %s' % e)
-                continue
-            callback(res)
-        if stop_pool:
-            JobPool.shutdown()
-
-    def pack(self, tar_path=None):
-        if tar_path is None:
-            tar_path = os.path.normpath(os.path.join(self.symbol_path, ".."))
-            tar_path = os.path.join(tar_path, 'symbols.tar.gz')
-        tar = tarfile.open(tar_path, "w:gz")
-        for dirname, subdirs, files in os.walk(self.symbol_path):
-            for filename in files:
-                tar.add(os.path.join(dirname, filename), filename)
-        tar.close()
-
-    def process(self, *args):
-        """Process files recursively in args."""
-        # We collect all files to process first then sort by size to schedule
-        # larger files first because larger files tend to take longer and we
-        # don't like long pole stragglers.
-        files = set()
-        for arg in args:
-            for f in self.get_files_to_process(arg):
-                files.add(f)
-
-        for f in sorted(files, key=os.path.getsize, reverse=True):
-            self.process_files((f,))
-
-    def get_files_to_process(self, file_or_dir):
-        """Generate the files to process from an input."""
-        if os.path.isdir(file_or_dir) and not self.should_skip_dir(file_or_dir):
-            for f in self.get_files_to_process_in_dir(file_or_dir):
-                yield f
-        elif os.path.isfile(file_or_dir):
-            yield file_or_dir
-
-    def get_files_to_process_in_dir(self, path):
-        """Generate the files to process in a directory.
-
-        Valid files are are determined by calling ShouldProcess.
-        """
-        for root, dirs, files in os.walk(path):
-            for d in dirs[:]:
-                if self.should_skip_dir(d):
-                    dirs.remove(d)
-            for f in files:
-                full_path = os.path.join(root, f)
-                if self.should_process(full_path):
-                    yield full_path
-
-    def submit_job(self, func_name, args, callback):
-        """Submits a job to the pool of workers"""
-        JobPool.submit((self, Dumper.lock, Dumper.srcdirRepoInfo, func_name, args), callback)
-
-    def process_files_finished(self, res):
-        """Callback from multiprocessing when process_files_work finishes;
-        run the cleanup work, if any"""
-        # only run the cleanup function once per tuple of files
-        self.files_record[res['files']] += 1
-        if self.files_record[res['files']] == len(self.archs):
-            del self.files_record[res['files']]
-            if res['after']:
-                res['after'](res['status'], res['after_arg'])
-
-    def process_files(self, files, after=None, after_arg=None):
-        """Dump symbols from these files into a symbol file, stored
-        |symbol_path|; processing is performed asynchronously, and Finish must be
-        called to wait for it complete and cleanup.
-        All files after the first are fallbacks in case the first file does not process
-        successfully; if it does, no other files will be touched."""
-        self.output_pid(sys.stderr, "Submitting jobs for files: %s" % str(files))
-
-        for arch_num, arch in enumerate(self.archs):
-            self.files_record[files] = 0  # record that we submitted jobs for this tuple of files
-            self.submit_job('process_files_work', args=(files, arch, after, after_arg),
-                            callback=self.process_files_finished)
-
-    def dump_syms_cmdline(self, file_name, arch, files):
-        """Get the commandline used to invoke dump_syms."""
-        # The Mac dumper overrides this.
-        return [self.dump_syms, file_name]
-
-    def process_files_work(self, file_names, arch, after, after_arg):
-        t_start = time.time()
-        self.output_pid(sys.stderr, "Worker processing files: %s" % (file_names,))
-
-        # our result is a status, a cleanup function, an argument to that function, and the tuple of files
-        # we were called on
-        result = {'status': False, 'after': after, 'after_arg': after_arg, 'files': file_names}
-
-        source_file_stream = ''
-        for file_name in file_names:
-            # files is a tuple of files, containing fallbacks in case the first file doesn't process successfully
-            try:
-                cmd = self.dump_syms_cmdline(file_name, arch, file_names)
-                self.output_pid(sys.stderr, ' '.join(cmd))
-                proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
-                                        stderr=open(os.devnull, 'wb'))
-                module_line = proc.stdout.readline().decode("utf-8")
-                if module_line.startswith("MODULE"):
-                    # MODULE os cpu guid debug_file
-                    (guid, debug_file) = (module_line.split())[3:5]
-                    # strip off .pdb extensions, and append .sym
-                    sym_file = re.sub("\.pdb$", "", debug_file) + ".sym"
-                    full_path = os.path.normpath(os.path.join(self.symbol_path,
-                                                              sym_file))
-                    try:
-                        os.makedirs(os.path.dirname(full_path))
-                    except OSError:  # already exists
-                        pass
-                    f = open(full_path, "w")
-                    f.write(module_line)
-                    # now process the rest of the output
-                    for line_as_bytes in proc.stdout:
-                        line = line_as_bytes.decode("utf-8")
-                        if line.startswith("FILE"):
-                            # FILE index filename
-                            (x, index, filename) = line.rstrip().split(None, 2)
-                            filename = os.path.normpath(self.fix_filename_case(filename))
-                            f.write("FILE %s %s\n" % (index, filename))
-                        elif line.startswith("INFO CODE_ID "):
-                            # INFO CODE_ID code_id code_file
-                            # This gives some info we can use to
-                            # store binaries in the symbol store.
-                            f.write(line)
-                        else:
-                            # pass through all other lines unchanged
-                            f.write(line)
-                            # we want to return true only if at least one line is not a MODULE or FILE line
-                            result['status'] = True
-                    f.close()
-                    proc.wait()
-                    # we output relative paths so callers can get a list of what
-                    # was generated
-                    self.output(sys.stdout, sym_file)
-            except StopIteration:
-                pass
-            except Exception as e:
-                self.output(sys.stderr, "Unexpected error: %s" % (str(e),))
-                raise
-            if result['status']:
-                # we only need 1 file to work
-                break
-
-        elapsed = time.time() - t_start
-        self.output_pid(sys.stderr, 'Worker finished processing %s in %.2fs' %
-                        (file_names, elapsed))
-        return result
-
-
-# Platform-specific subclasses.  For the most part, these just have
-# logic to determine what files to extract symbols from.
-
-
-class DumperWin32(Dumper):
-    fixedFilenameCaseCache = {}
-
-    def should_process(self, file_name):
-        """This function will allow processing of pdb files that have dll
-        or exe files with the same base name next to them."""
-        if not Dumper.should_process(self, file_name):
-            return False
-        if file_name.endswith(".pdb"):
-            (path, ext) = os.path.splitext(file_name)
-            if os.path.isfile(path + ".exe") or os.path.isfile(path + ".dll"):
-                return True
-        return False
-
-    def fix_filename_case(self, file_name):
-        """Recent versions of Visual C++ put filenames into
-        PDB files as all lowercase.  If the file_name exists
-        on the local filesystem, fix it."""
-
-        # Use a cached version if we have one.
-        if file_name in self.fixedFilenameCaseCache:
-            return self.fixedFilenameCaseCache[file_name]
-
-        result = file_name
-
-        ctypes.windll.kernel32.SetErrorMode(ctypes.c_uint(1))
-        if not isinstance(file_name, str):
-            file_name = file_name.decode(sys.getfilesystemencoding())
-        handle = ctypes.windll.kernel32.CreateFileW(file_name,
-                                                    # GENERIC_READ
-                                                    0x80000000,
-                                                    # FILE_SHARE_READ
-                                                    1,
-                                                    None,
-                                                    # OPEN_EXISTING
-                                                    3,
-                                                    0,
-                                                    None)
-        if handle != -1:
-            size = ctypes.windll.kernel32.GetFinalPathNameByHandleW(handle,
-                                                                    None,
-                                                                    0,
-                                                                    0)
-            buf = ctypes.create_unicode_buffer(size)
-            if ctypes.windll.kernel32.GetFinalPathNameByHandleW(handle,
-                                                                buf,
-                                                                size,
-                                                                0) > 0:
-                # The return value of GetFinalPathNameByHandleW uses the
-                # '\\?\' prefix.
-                result = buf.value.encode(sys.getfilesystemencoding())[4:]
-            ctypes.windll.kernel32.CloseHandle(handle)
-
-        # Cache the corrected version to avoid future filesystem hits.
-        self.fixedFilenameCaseCache[file_name] = result
-        return result
-
-
-class DumperLinux(Dumper):
-    objcopy = os.environ['OBJCOPY'] if 'OBJCOPY' in os.environ else 'objcopy'
-
-    def should_process(self, file_name):
-        """This function will allow processing of files that are
-        executable, or end with the .so extension, and additionally
-        file(1) reports as being ELF files.  It expects to find the file
-        command in PATH."""
-        if not Dumper.should_process(self, file_name):
-            return False
-        if file_name.endswith(".so") or os.access(file_name, os.X_OK):
-            if not os.path.islink(file_name) and self.run_file_command(file_name).startswith("ELF"):
-                return True
-        return False
-
-
-def after_mac(status, dsymbundle):
-    """Cleanup function to run on Macs after we process the file(s)."""
-    shutil.rmtree(dsymbundle)
-
-
-class DumperMac(Dumper):
-    def should_process(self, file_name):
-        """This function will allow processing of files that are
-        executable, or end with the .dylib extension, and additionally
-        file(1) reports as being Mach-O files.  It expects to find the file
-        command in PATH."""
-        if not Dumper.should_process(self, file_name):
-            return False
-        if file_name.endswith(".dylib") or os.access(file_name, os.X_OK):
-            if not os.path.islink(file_name) and self.run_file_command(file_name).startswith("Mach-O"):
-                return True
-        return False
-
-    def should_skip_dir(self, directory):
-        """We create .dSYM bundles on the fly, but if someone runs
-        buildsymbols twice, we should skip any bundles we created
-        previously, otherwise we'll recurse into them and try to 
-        dump the inner bits again."""
-        if directory.endswith(".dSYM"):
-            return True
-        return False
-
-    def process_files(self, files, after=None, after_arg=None):
-        # also note, files must be len 1 here, since we're the only ones
-        # that ever add more than one file to the list
-        file_name = files[0]
-        self.output_pid(sys.stderr, "Submitting job for Mac pre-processing on file: %s" % file_name)
-        self.submit_job('process_files_work_mac', args=(file_name,), callback=self.process_files_mac_finished)
-
-    def process_files_mac_finished(self, result):
-        if result['status']:
-            # kick off new jobs per-arch with our new list of files
-            Dumper.process_files(self, result['files'], after=after_mac, after_arg=result['files'][0])
-
-    def dump_syms_cmdline(self, file_name, arch, files):
-        """Get the commandline used to invoke dump_syms."""
-        # dump_syms wants the path to the original binary and the .dSYM
-        # in order to dump all the symbols.
-        if len(files) == 2 and file_name == files[0] and file_name.endswith('.dSYM'):
-            # This is the .dSYM bundle.
-            return [self.dump_syms] + arch.split() + ['-g', file_name, files[1]]
-        return Dumper.dump_syms_cmdline(self, file_name, arch, files)
-
-    def process_files_work_mac(self, file_name):
-        """dump_syms on Mac needs to be run on a dSYM bundle produced
-        by dsymutil(1), so run dsymutil here and pass the bundle name
-        down to the superclass method instead."""
-        t_start = time.time()
-        self.output_pid(sys.stderr, "Worker running Mac pre-processing on file: %s" % (file_name,))
-
-        # our return is a status and a tuple of files to dump symbols for
-        # the extra files are fallbacks; as soon as one is dumped successfully, we stop
-        result = {'status': False, 'files': None, 'file_key': file_name}
-        dsymbundle = file_name + ".dSYM"
-        if os.path.exists(dsymbundle):
-            shutil.rmtree(dsymbundle)
-        #dsymutil = buildconfig.substs['DSYMUTIL']
-        dsymutil = 'dsymutil'
-        # dsymutil takes --arch=foo instead of -a foo like everything else
-        try:
-            cmd = ([dsymutil] +
-                   [a.replace('-a ', '--arch=') for a in self.archs if a] +
-                   [file_name])
-            self.output_pid(sys.stderr, ' '.join(cmd))
-            subprocess.check_call(cmd, stdout=open(os.devnull, 'w'))
-        except subprocess.CalledProcessError as e:
-            self.output_pid(sys.stderr, 'Error running dsymutil: %s' % str(e))
-
-        if not os.path.exists(dsymbundle):
-            # dsymutil won't produce a .dSYM for files without symbols
-            self.output_pid(sys.stderr, "No symbols found in file: %s" % (file_name,))
-            result['status'] = False
-            result['files'] = (file_name,)
-            return result
-
-        result['status'] = True
-        result['files'] = (dsymbundle, file_name)
-        elapsed = time.time() - t_start
-        self.output_pid(sys.stderr, 'Worker finished processing %s in %.2fs' %
-                        (file_name, elapsed))
-        return result
-
-
-# Entry point if called as a standalone program
-def main():
-    parser = OptionParser(usage="usage: %prog [options] <dump_syms binary> <symbol store path> <debug info files>")
-    parser.add_option("-a", "--archs",
-                      action="store", dest="archs",
-                      help="Run dump_syms -a <arch> for each space separated cpu architecture in ARCHS (only on OS X)")
-    parser.add_option("-x", "--exclude",
-                      action="append", dest="exclude", default=[], metavar="PATTERN",
-                      help="Skip processing files matching PATTERN.")
-    (options, args) = parser.parse_args()
-
-    if len(args) < 3:
-        parser.error("not enough arguments")
-        exit(1)
-
-    dumper = get_platform_specific_dumper(dump_syms=args[0],
-                                          symbol_path=args[1],
-                                          archs=options.archs,
-                                          exclude=options.exclude)
-
-    dumper.process(*args[2:])
-    dumper.finish()
-    dumper.pack()
-
-
-# run main if run directly
-if __name__ == "__main__":
-    # set up the multiprocessing infrastructure before we start;
-    # note that this needs to be in the __main__ guard, or else Windows will choke
-    Dumper.global_init()
-
-    main()
--- a/scripts/symbolstore.py.license
+++ b/scripts/symbolstore.py.license
@@ -1 +0,0 @@
-SPDX-License-Identifier: MPL-2.0