Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CAR files support #17

Merged
merged 1 commit into from
May 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions atproto/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from .car import CAR
from .cid import CID
from .nsid import NSID
from .uri import AtUri
Expand All @@ -10,6 +11,7 @@
'Client',
'models',
'NSID',
'CAR',
'CID',
'AtUri',
]
68 changes: 68 additions & 0 deletions atproto/car/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from io import BytesIO
from typing import Dict

import dag_cbor

from .. import leb128
from ..cid import CID

Nodes = Dict[CID, dict]


class CAR:
"""CAR file."""

_CID_V1_BYTES_LEN = 36

def __init__(self, root: str, nodes: Nodes):
self._root = root
self._nodes = nodes

@property
def root(self):
"""Get root."""
return self._root

@property
def nodes(self) -> Nodes:
"""Get nodes."""
return self._nodes

@classmethod
def from_bytes(cls, data: bytes) -> 'CAR':
"""Decode CAR file.

Note:
You could pass as `data` response of `client.com.atproto.sync.get_repo`, for example.
And another responses of methods in the `sync` namespace.

Example:
>>> from atproto import CAR, Client
>>> client = Client()
>>> client.login('my-handle', 'my-password')
>>> repo = client.com.atproto.sync.get_repo({'did': client.me.did})
>>> car_file = CAR.from_bytes(repo)
>>> print(car_file.root)
>>> print(car_file.nodes)

Args:
data: content of the file.

Returns:
:obj:`atproto.CAR`: Parsed CAR file.
"""
repo = BytesIO(data)

header_len, _ = leb128.u.decode_reader(repo)
header = dag_cbor.decode(repo.read(header_len))
root = header.get('roots')[0]

nodes = {}
while repo.tell() != len(data):
block_len, _ = leb128.u.decode_reader(repo)
cid = CID.decode(repo.read(CAR._CID_V1_BYTES_LEN))
block = dag_cbor.decode(repo.read(block_len - CAR._CID_V1_BYTES_LEN))

nodes[cid] = block

return cls(root=root, nodes=nodes)
25 changes: 2 additions & 23 deletions atproto/cid/__init__.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,8 @@
from typing import Union
from multiformats import CID as _CID

from multiformats import CID as MCID

# TODO(MarshalX): Implement more methods


class CID:
class CID(_CID):
"""CID (Content IDentifier).

Hash for Merkle Search Tree (MST).
"""

def __init__(self, cid: MCID):
self._cid = cid

def encode(self) -> str:
"""Encodes the CID."""
return self._cid.encode()

@classmethod
def decode(cls, cid: Union[str, bytes]) -> 'CID':
"""Decodes a CID from str or bytes."""
return cls(MCID.decode(cid))

@property
def version(self) -> int:
"""Get CID version."""
return self._cid.version
91 changes: 91 additions & 0 deletions atproto/leb128/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
"""
Original source code: https://github.com/mohanson/leb128

https://en.wikipedia.org/wiki/LEB128

LEB128 or Little Endian Base 128 is a form of variable-length code
compression used to store an arbitrarily large integer in a small number of
bytes. LEB128 is used in the DWARF debug file format and the WebAssembly
binary encoding for all integer literals.
"""

import typing


class _U:
@staticmethod
def encode(i: int) -> bytearray:
"""Encode the int i using unsigned leb128 and return the encoded bytearray."""
assert i >= 0
r = []
while True:
byte = i & 0x7F
i = i >> 7
if i == 0:
r.append(byte)
return bytearray(r)
r.append(0x80 | byte)

@staticmethod
def decode(b: bytearray) -> int:
"""Decode the unsigned leb128 encoded bytearray"""
r = 0
for i, e in enumerate(b):
r = r + ((e & 0x7F) << (i * 7))
return r

@staticmethod
def decode_reader(r: typing.BinaryIO) -> (int, int):
"""
Decode the unsigned leb128 encoded from a reader, it will return two values, the actual number and the number
of bytes read.
"""
a = bytearray()
while True:
b = ord(r.read(1))
a.append(b)
if (b & 0x80) == 0:
break
return _U.decode(a), len(a)


class _I:
@staticmethod
def encode(i: int) -> bytearray:
"""Encode the int i using signed leb128 and return the encoded bytearray."""
r = []
while True:
byte = i & 0x7F
i = i >> 7
if (i == 0 and byte & 0x40 == 0) or (i == -1 and byte & 0x40 != 0):
r.append(byte)
return bytearray(r)
r.append(0x80 | byte)

@staticmethod
def decode(b: bytearray) -> int:
"""Decode the signed leb128 encoded bytearray"""
r = 0
for i, e in enumerate(b):
r = r + ((e & 0x7F) << (i * 7))
if e & 0x40 != 0:
r |= -(1 << (i * 7) + 7)
return r

@staticmethod
def decode_reader(r: typing.BinaryIO) -> (int, int):
"""
Decode the signed leb128 encoded from a reader, it will return two values, the actual number and the number
of bytes read.
"""
a = bytearray()
while True:
b = ord(r.read(1))
a.append(b)
if (b & 0x80) == 0:
break
return _I.decode(a), len(a)


u = _U()
i = _I()
7 changes: 7 additions & 0 deletions docs/source/atproto/atproto.car.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
atproto.car
===========

.. automodule:: atproto.car
:members:
:undoc-members:
:show-inheritance:
7 changes: 7 additions & 0 deletions docs/source/atproto/atproto.leb128.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
atproto.leb128
==============

.. automodule:: atproto.leb128
:members:
:undoc-members:
:show-inheritance:
2 changes: 2 additions & 0 deletions docs/source/atproto/atproto.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ Subpackages
.. toctree::
:maxdepth: 4

atproto.car
atproto.cid
atproto.cli
atproto.codegen
atproto.leb128
atproto.lexicon
atproto.nsid
atproto.uri
Expand Down
1 change: 1 addition & 0 deletions docs/source/atproto/atproto.xrpc_client.models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,5 @@ Submodules

atproto.xrpc_client.models.base
atproto.xrpc_client.models.blob_ref
atproto.xrpc_client.models.type_conversion
atproto.xrpc_client.models.utils
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
type\_conversion
============================================

.. automodule:: atproto.xrpc_client.models.type_conversion
:members:
:undoc-members:
:show-inheritance:
7 changes: 7 additions & 0 deletions docs/source/car.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CAR
===

.. automodule:: atproto.car
:members:
:undoc-members:
:show-inheritance:
1 change: 1 addition & 0 deletions docs/source/cid.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ CID
:members:
:undoc-members:
:show-inheritance:
:inherited-members:
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Documentation
nsid
cid
uri
car
exceptions

.. toctree::
Expand Down
19 changes: 18 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ httpx = "0.24.0"
dacite = "1.8.0"
multiformats = "0.2.1"
typing-extensions = "4.5.0"
dag-cbor = "0.3.2"

[tool.poetry.dev-dependencies]

Expand Down
10 changes: 9 additions & 1 deletion test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import logging
import os

from atproto import AsyncClient, AtUri, Client, models
from atproto import CAR, AsyncClient, AtUri, Client, models

# logging.basicConfig(level=logging.DEBUG)
logging.basicConfig(level=logging.INFO)
Expand Down Expand Up @@ -31,6 +31,14 @@ def sync_main():
client = Client()
client.login(os.environ['USERNAME'], os.environ['PASSWORD'])

repo = client.com.atproto.sync.get_repo({'did': client.me.did})
car_file = CAR.from_bytes(repo)
print(car_file.root)
print(car_file.nodes)

# res = client.com.atproto.repo.get_record(...) # implement by yourself
# also you need to parse "res.value" as profile record using get_or_create_model method

# search_result = client.bsky.actor.search_actors_typeahead({'term': 'marshal'})
# for actor in search_result.actors:
# print(actor.handle, actor.displayName)
Expand Down