From 8bb31d8ac6547dd3bac94b0147b81829de46d534 Mon Sep 17 00:00:00 2001 From: fracape Date: Fri, 8 Dec 2023 19:30:26 +0000 Subject: [PATCH] deploy: 65a0b4f778c2f083356986ef7d8e4b6647e5fb15 --- tutorials/cli_tutorial_2.html | 2 +- tutorials/cli_tutorial_2_nb.html | 2 +- tutorials/cli_tutorial_3.html | 2 +- tutorials/cli_tutorial_3_nb.html | 2 +- tutorials/cli_tutorial_6.html | 78 ++++----- tutorials/cli_tutorial_6_nb.html | 78 ++++----- tutorials/cli_tutorial_7.html | 64 ++++---- tutorials/cli_tutorial_7_nb.html | 64 ++++---- tutorials/convert_nb.html | 132 ++++++++-------- tutorials/detectron2.html | 188 +++++++++++----------- tutorials/detectron2_nb.html | 188 +++++++++++----------- tutorials/download.html | 92 +++++------ tutorials/download_nb.html | 92 +++++------ tutorials/encdec.html | 74 ++++----- tutorials/encdec_nb.html | 74 ++++----- tutorials/evaluate.html | 264 +++++++++++++++---------------- tutorials/evaluate_nb.html | 264 +++++++++++++++---------------- tutorials/fiftyone.html | 38 ++--- tutorials/fiftyone_nb.html | 38 ++--- tutorials/index.html | 132 ++++++++-------- 20 files changed, 934 insertions(+), 934 deletions(-) diff --git a/tutorials/cli_tutorial_2.html b/tutorials/cli_tutorial_2.html index b37ddca0..26a31b53 100644 --- a/tutorials/cli_tutorial_2.html +++ b/tutorials/cli_tutorial_2.html @@ -799,7 +799,7 @@

2. Registering Datasets

Let’s remove the image data as well:

-
rm -rf /tmp/my_data_set
+
rm -rf /tmp/my_data_set
 

A final note/observation before moving to the next tutorial.

diff --git a/tutorials/cli_tutorial_2_nb.html b/tutorials/cli_tutorial_2_nb.html index 3e1874f7..63a23890 100644 --- a/tutorials/cli_tutorial_2_nb.html +++ b/tutorials/cli_tutorial_2_nb.html @@ -805,7 +805,7 @@

Contents

Let’s remove the image data as well:

-
rm -rf /tmp/my_data_set
+
rm -rf /tmp/my_data_set
 

A final note/observation before moving to the next tutorial.

diff --git a/tutorials/cli_tutorial_3.html b/tutorials/cli_tutorial_3.html index 105f1821..0ba11c81 100644 --- a/tutorials/cli_tutorial_3.html +++ b/tutorials/cli_tutorial_3.html @@ -364,7 +364,7 @@

3. MPEG-VCM Evaluation

oiv6-mpeg-detection-v1

  • oiv6-mpeg-segmentation-v1

  • -
    compressai-vision list
    +
    compressai-vision list
     
    importing fiftyone
    diff --git a/tutorials/cli_tutorial_3_nb.html b/tutorials/cli_tutorial_3_nb.html
    index 75a3b439..df9e4ef3 100644
    --- a/tutorials/cli_tutorial_3_nb.html
    +++ b/tutorials/cli_tutorial_3_nb.html
    @@ -370,7 +370,7 @@ 

    Contents

  • oiv6-mpeg-detection-v1

  • oiv6-mpeg-segmentation-v1

  • -
    compressai-vision list
    +
    compressai-vision list
     
    importing fiftyone
    diff --git a/tutorials/cli_tutorial_6.html b/tutorials/cli_tutorial_6.html
    index 78d14d60..4cf786b4 100644
    --- a/tutorials/cli_tutorial_6.html
    +++ b/tutorials/cli_tutorial_6.html
    @@ -362,15 +362,15 @@ 

    6. VTM benchmark generationvtm subcommand to manage, encode and cache the VTM produced bitstreams on disk.

    Let’s generate some encoded bitstreams.

    -
    compressai-vision vtm --y --dataset-name=oiv6-mpeg-detection-v1 \
    ---slice=0:2 \
    ---scale=100 \
    ---progress=1 \
    ---qpars=47 \
    ---vtm_cache=/tmp/bitstreams \
    ---vtm_dir={path_to_vtm_software}/bin \
    ---vtm_cfg={path_to_vtm_software}/cfg/encoder_intra_vtm.cfg \
    ---output=vtm_out.json
    +
    compressai-vision vtm --y --dataset-name=oiv6-mpeg-detection-v1 \
    +--slice=0:2 \
    +--scale=100 \
    +--progress=1 \
    +--qpars=47 \
    +--vtm_cache=/tmp/bitstreams \
    +--vtm_dir={path_to_vtm_software}/bin \
    +--vtm_cfg={path_to_vtm_software}/cfg/encoder_intra_vtm.cfg \
    +--output=vtm_out.json
     
    importing fiftyone
    @@ -403,15 +403,15 @@ 

    6. VTM benchmark generation/tmp/bitstreams/SCALE/QP. Let’s see what happens if we run the exact same command again:

    -
    compressai-vision vtm --y --dataset-name=oiv6-mpeg-detection-v1 \
    ---slice=0:2 \
    ---scale=100 \
    ---progress=1 \
    ---qpars=47 \
    ---vtm_cache=/tmp/bitstreams \
    ---vtm_dir={path_to_vtm_software}/bin \
    ---vtm_cfg={path_to_vtm_software}/cfg/encoder_intra_vtm.cfg \
    ---output=vtm_out.json
    +
    compressai-vision vtm --y --dataset-name=oiv6-mpeg-detection-v1 \
    +--slice=0:2 \
    +--scale=100 \
    +--progress=1 \
    +--qpars=47 \
    +--vtm_cache=/tmp/bitstreams \
    +--vtm_dir={path_to_vtm_software}/bin \
    +--vtm_cfg={path_to_vtm_software}/cfg/encoder_intra_vtm.cfg \
    +--output=vtm_out.json
     
    importing fiftyone
    @@ -442,19 +442,19 @@ 

    6. VTM benchmark generation
    echo " " > /tmp/bitstreams/100/47/bin_000a1249af2bc5f0
    +
    echo " " > /tmp/bitstreams/100/47/bin_000a1249af2bc5f0
     

    And run the command again:

    -
    compressai-vision vtm --y --dataset-name=oiv6-mpeg-detection-v1 \
    ---slice=0:2 \
    ---scale=100 \
    ---progress=1 \
    ---qpars=47 \
    ---vtm_cache=/tmp/bitstreams \
    ---vtm_dir={path_to_vtm_software}/bin \
    ---vtm_cfg={path_to_vtm_software}/cfg/encoder_intra_vtm.cfg \
    ---output=vtm_out.json
    +
    compressai-vision vtm --y --dataset-name=oiv6-mpeg-detection-v1 \
    +--slice=0:2 \
    +--scale=100 \
    +--progress=1 \
    +--qpars=47 \
    +--vtm_cache=/tmp/bitstreams \
    +--vtm_dir={path_to_vtm_software}/bin \
    +--vtm_cfg={path_to_vtm_software}/cfg/encoder_intra_vtm.cfg \
    +--output=vtm_out.json
     
    importing fiftyone
    @@ -493,17 +493,17 @@ 

    6. VTM benchmark generationdetectron2-eval for the VTM case like this:

    -
    compressai-vision detectron2-eval --y --dataset-name=oiv6-mpeg-detection-v1 \
    ---slice=0:2 \
    ---scale=100 \
    ---progress=1 \
    ---qpars=47 \
    ---vtm \
    ---vtm_cache=/tmp/bitstreams \
    ---vtm_dir={path_to_vtm_software}/bin \
    ---vtm_cfg={path_to_vtm_software}/cfg/encoder_intra_vtm.cfg \
    ---output=detectron2_vtm.json \
    ---model=COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml
    +
    compressai-vision detectron2-eval --y --dataset-name=oiv6-mpeg-detection-v1 \
    +--slice=0:2 \
    +--scale=100 \
    +--progress=1 \
    +--qpars=47 \
    +--vtm \
    +--vtm_cache=/tmp/bitstreams \
    +--vtm_dir={path_to_vtm_software}/bin \
    +--vtm_cfg={path_to_vtm_software}/cfg/encoder_intra_vtm.cfg \
    +--output=detectron2_vtm.json \
    +--model=COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml
     
    importing fiftyone
    diff --git a/tutorials/cli_tutorial_6_nb.html b/tutorials/cli_tutorial_6_nb.html
    index c29f4f33..492151b9 100644
    --- a/tutorials/cli_tutorial_6_nb.html
    +++ b/tutorials/cli_tutorial_6_nb.html
    @@ -368,15 +368,15 @@ 

    Contents

    use the vtm subcommand to manage, encode and cache the VTM produced bitstreams on disk.

    Let’s generate some encoded bitstreams.

    -
    compressai-vision vtm --y --dataset-name=oiv6-mpeg-detection-v1 \
    ---slice=0:2 \
    ---scale=100 \
    ---progress=1 \
    ---qpars=47 \
    ---vtm_cache=/tmp/bitstreams \
    ---vtm_dir={path_to_vtm_software}/bin \
    ---vtm_cfg={path_to_vtm_software}/cfg/encoder_intra_vtm.cfg \
    ---output=vtm_out.json
    +
    compressai-vision vtm --y --dataset-name=oiv6-mpeg-detection-v1 \
    +--slice=0:2 \
    +--scale=100 \
    +--progress=1 \
    +--qpars=47 \
    +--vtm_cache=/tmp/bitstreams \
    +--vtm_dir={path_to_vtm_software}/bin \
    +--vtm_cfg={path_to_vtm_software}/cfg/encoder_intra_vtm.cfg \
    +--output=vtm_out.json
     
    importing fiftyone
    @@ -409,15 +409,15 @@ 

    Contents

    As you can see, bitstreams we’re generated and cached into /tmp/bitstreams/SCALE/QP. Let’s see what happens if we run the exact same command again:

    -
    compressai-vision vtm --y --dataset-name=oiv6-mpeg-detection-v1 \
    ---slice=0:2 \
    ---scale=100 \
    ---progress=1 \
    ---qpars=47 \
    ---vtm_cache=/tmp/bitstreams \
    ---vtm_dir={path_to_vtm_software}/bin \
    ---vtm_cfg={path_to_vtm_software}/cfg/encoder_intra_vtm.cfg \
    ---output=vtm_out.json
    +
    compressai-vision vtm --y --dataset-name=oiv6-mpeg-detection-v1 \
    +--slice=0:2 \
    +--scale=100 \
    +--progress=1 \
    +--qpars=47 \
    +--vtm_cache=/tmp/bitstreams \
    +--vtm_dir={path_to_vtm_software}/bin \
    +--vtm_cfg={path_to_vtm_software}/cfg/encoder_intra_vtm.cfg \
    +--output=vtm_out.json
     
    importing fiftyone
    @@ -448,19 +448,19 @@ 

    Contents

    Instead of generating the bitstreams, the program found them cached on the disk and just verified them.

    Let’s fool around and corrupt one of the bitstreams:

    -
    echo " " > /tmp/bitstreams/100/47/bin_000a1249af2bc5f0
    +
    echo " " > /tmp/bitstreams/100/47/bin_000a1249af2bc5f0
     

    And run the command again:

    -
    compressai-vision vtm --y --dataset-name=oiv6-mpeg-detection-v1 \
    ---slice=0:2 \
    ---scale=100 \
    ---progress=1 \
    ---qpars=47 \
    ---vtm_cache=/tmp/bitstreams \
    ---vtm_dir={path_to_vtm_software}/bin \
    ---vtm_cfg={path_to_vtm_software}/cfg/encoder_intra_vtm.cfg \
    ---output=vtm_out.json
    +
    compressai-vision vtm --y --dataset-name=oiv6-mpeg-detection-v1 \
    +--slice=0:2 \
    +--scale=100 \
    +--progress=1 \
    +--qpars=47 \
    +--vtm_cache=/tmp/bitstreams \
    +--vtm_dir={path_to_vtm_software}/bin \
    +--vtm_cfg={path_to_vtm_software}/cfg/encoder_intra_vtm.cfg \
    +--output=vtm_out.json
     
    importing fiftyone
    @@ -499,17 +499,17 @@ 

    Contents

    of crashes / data corruption, you can just send the same scripts into your queue system over and over again if necessary.

    Finally, you can run detectron2-eval for the VTM case like this:

    -
    compressai-vision detectron2-eval --y --dataset-name=oiv6-mpeg-detection-v1 \
    ---slice=0:2 \
    ---scale=100 \
    ---progress=1 \
    ---qpars=47 \
    ---vtm \
    ---vtm_cache=/tmp/bitstreams \
    ---vtm_dir={path_to_vtm_software}/bin \
    ---vtm_cfg={path_to_vtm_software}/cfg/encoder_intra_vtm.cfg \
    ---output=detectron2_vtm.json \
    ---model=COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml
    +
    compressai-vision detectron2-eval --y --dataset-name=oiv6-mpeg-detection-v1 \
    +--slice=0:2 \
    +--scale=100 \
    +--progress=1 \
    +--qpars=47 \
    +--vtm \
    +--vtm_cache=/tmp/bitstreams \
    +--vtm_dir={path_to_vtm_software}/bin \
    +--vtm_cfg={path_to_vtm_software}/cfg/encoder_intra_vtm.cfg \
    +--output=detectron2_vtm.json \
    +--model=COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml
     
    importing fiftyone
    diff --git a/tutorials/cli_tutorial_7.html b/tutorials/cli_tutorial_7.html
    index b3a68911..7085cf5e 100644
    --- a/tutorials/cli_tutorial_7.html
    +++ b/tutorials/cli_tutorial_7.html
    @@ -542,17 +542,17 @@ 

    7. Importing and Using Video
    import cv2
    -import matplotlib.pyplot as plt
    -import fiftyone as fo
    -from fiftyone import ViewField as F
    -from math import floor
    +
    import cv2
    +import matplotlib.pyplot as plt
    +import fiftyone as fo
    +from fiftyone import ViewField as F
    +from math import floor
     
    -
    dataset=fo.load_dataset("sfu-hw-objects-v1")
    +
    dataset=fo.load_dataset("sfu-hw-objects-v1")
     
    -
    dataset
    +
    dataset
     
    Name:        sfu-hw-objects-v1
    @@ -577,7 +577,7 @@ 

    7. Importing and Using Video
    dataset.first()
    +
    dataset.first()
     
    <Sample: {
    @@ -599,12 +599,12 @@ 

    7. Importing and Using Videoname_tag to the video descriptive names (BasketballDrill, Traffic, PeopleOnStreeet, etc.). Let’s pick a certain video sample:

    -
    sample = dataset[ (F("name_tag") == "BasketballDrill") & (F("class_tag") == "ClassC") ].first()
    +
    sample = dataset[ (F("name_tag") == "BasketballDrill") & (F("class_tag") == "ClassC") ].first()
     

    Take a look at the first frame ground truth detections (note that frame indices start from 1):

    -
    sample.frames[1]
    +
    sample.frames[1]
     
    <FrameView: {
    @@ -738,39 +738,39 @@ 

    7. Importing and Using Video
    vid=cv2.VideoCapture(sample.filepath)
    +
    vid=cv2.VideoCapture(sample.filepath)
     
    -
    print("number of frames:",int(vid.get(cv2.CAP_PROP_FRAME_COUNT)))
    +
    print("number of frames:",int(vid.get(cv2.CAP_PROP_FRAME_COUNT)))
     
    number of frames: 501
     

    Let’s define a small helper function:

    -
    def draw_detections(sample: fo.Sample, vid: cv2.VideoCapture, nframe: int):
    -    nmax=int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    -    if nframe > nmax:
    -        raise AssertionError("max frame is " + str(nmax))
    -    ok = vid.set(cv2.CAP_PROP_POS_FRAMES, nframe-1)
    -    if not ok:
    -        raise AssertionError("seek failed")
    -    ok, arr = vid.read() # BGR image in arr
    -    if not ok:
    -        raise AssertionError("no image")
    -    for detection in sample.frames[nframe].detections.detections:
    -        x0, y0, w, h = detection.bounding_box # rel coords
    -        x1, y1, x2, y2 = floor(x0*arr.shape[1]), floor(y0*arr.shape[0]), floor((x0+w)*arr.shape[1]), floor((y0+h)*arr.shape[0])
    -        arr=cv2.rectangle(arr, (x1, y1), (x2, y2), (255, 0, 0), 5)
    -    return arr
    +
    def draw_detections(sample: fo.Sample, vid: cv2.VideoCapture, nframe: int):
    +    nmax=int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    +    if nframe > nmax:
    +        raise AssertionError("max frame is " + str(nmax))
    +    ok = vid.set(cv2.CAP_PROP_POS_FRAMES, nframe-1)
    +    if not ok:
    +        raise AssertionError("seek failed")
    +    ok, arr = vid.read() # BGR image in arr
    +    if not ok:
    +        raise AssertionError("no image")
    +    for detection in sample.frames[nframe].detections.detections:
    +        x0, y0, w, h = detection.bounding_box # rel coords
    +        x1, y1, x2, y2 = floor(x0*arr.shape[1]), floor(y0*arr.shape[0]), floor((x0+w)*arr.shape[1]), floor((y0+h)*arr.shape[0])
    +        arr=cv2.rectangle(arr, (x1, y1), (x2, y2), (255, 0, 0), 5)
    +    return arr
     
    -
    img=draw_detections(sample, vid, 200)
    -img_ = img[:,:,::-1] # BGR -> RGB
    +
    img=draw_detections(sample, vid, 200)
    +img_ = img[:,:,::-1] # BGR -> RGB
     
    -
    plt.imshow(img_)
    -vid.release()
    +
    plt.imshow(img_)
    +vid.release()
     
    ../_images/cli_tutorial_7_nb_22_0.png @@ -848,7 +848,7 @@

    7. Importing and Using Video
    cat detectron2_test.json
    +
    cat detectron2_test.json
     
    {
    diff --git a/tutorials/cli_tutorial_7_nb.html b/tutorials/cli_tutorial_7_nb.html
    index 42d01064..d848df8a 100644
    --- a/tutorials/cli_tutorial_7_nb.html
    +++ b/tutorials/cli_tutorial_7_nb.html
    @@ -548,17 +548,17 @@ 

    Contents

    In order to demonstrate how video datasets are used, let’s continue in python notebook:

    -
    import cv2
    -import matplotlib.pyplot as plt
    -import fiftyone as fo
    -from fiftyone import ViewField as F
    -from math import floor
    +
    import cv2
    +import matplotlib.pyplot as plt
    +import fiftyone as fo
    +from fiftyone import ViewField as F
    +from math import floor
     
    -
    dataset=fo.load_dataset("sfu-hw-objects-v1")
    +
    dataset=fo.load_dataset("sfu-hw-objects-v1")
     
    -
    dataset
    +
    dataset
     
    Name:        sfu-hw-objects-v1
    @@ -583,7 +583,7 @@ 

    Contents

    In contrast to image datasets where each sample was an image, now a sample corresponds to a video:

    -
    dataset.first()
    +
    dataset.first()
     
    <Sample: {
    @@ -605,12 +605,12 @@ 

    Contents

    class directories (ClassA, ClassB, etc.), while name_tag to the video descriptive names (BasketballDrill, Traffic, PeopleOnStreeet, etc.). Let’s pick a certain video sample:

    -
    sample = dataset[ (F("name_tag") == "BasketballDrill") & (F("class_tag") == "ClassC") ].first()
    +
    sample = dataset[ (F("name_tag") == "BasketballDrill") & (F("class_tag") == "ClassC") ].first()
     

    Take a look at the first frame ground truth detections (note that frame indices start from 1):

    -
    sample.frames[1]
    +
    sample.frames[1]
     
    <FrameView: {
    @@ -744,39 +744,39 @@ 

    Contents

    Start reading the video file with OpenCV:

    -
    vid=cv2.VideoCapture(sample.filepath)
    +
    vid=cv2.VideoCapture(sample.filepath)
     
    -
    print("number of frames:",int(vid.get(cv2.CAP_PROP_FRAME_COUNT)))
    +
    print("number of frames:",int(vid.get(cv2.CAP_PROP_FRAME_COUNT)))
     
    number of frames: 501
     

    Let’s define a small helper function:

    -
    def draw_detections(sample: fo.Sample, vid: cv2.VideoCapture, nframe: int):
    -    nmax=int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    -    if nframe > nmax:
    -        raise AssertionError("max frame is " + str(nmax))
    -    ok = vid.set(cv2.CAP_PROP_POS_FRAMES, nframe-1)
    -    if not ok:
    -        raise AssertionError("seek failed")
    -    ok, arr = vid.read() # BGR image in arr
    -    if not ok:
    -        raise AssertionError("no image")
    -    for detection in sample.frames[nframe].detections.detections:
    -        x0, y0, w, h = detection.bounding_box # rel coords
    -        x1, y1, x2, y2 = floor(x0*arr.shape[1]), floor(y0*arr.shape[0]), floor((x0+w)*arr.shape[1]), floor((y0+h)*arr.shape[0])
    -        arr=cv2.rectangle(arr, (x1, y1), (x2, y2), (255, 0, 0), 5)
    -    return arr
    +
    def draw_detections(sample: fo.Sample, vid: cv2.VideoCapture, nframe: int):
    +    nmax=int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
    +    if nframe > nmax:
    +        raise AssertionError("max frame is " + str(nmax))
    +    ok = vid.set(cv2.CAP_PROP_POS_FRAMES, nframe-1)
    +    if not ok:
    +        raise AssertionError("seek failed")
    +    ok, arr = vid.read() # BGR image in arr
    +    if not ok:
    +        raise AssertionError("no image")
    +    for detection in sample.frames[nframe].detections.detections:
    +        x0, y0, w, h = detection.bounding_box # rel coords
    +        x1, y1, x2, y2 = floor(x0*arr.shape[1]), floor(y0*arr.shape[0]), floor((x0+w)*arr.shape[1]), floor((y0+h)*arr.shape[0])
    +        arr=cv2.rectangle(arr, (x1, y1), (x2, y2), (255, 0, 0), 5)
    +    return arr
     
    -
    img=draw_detections(sample, vid, 200)
    -img_ = img[:,:,::-1] # BGR -> RGB
    +
    img=draw_detections(sample, vid, 200)
    +img_ = img[:,:,::-1] # BGR -> RGB
     
    -
    plt.imshow(img_)
    -vid.release()
    +
    plt.imshow(img_)
    +vid.release()
     
    ../_images/cli_tutorial_7_nb_22_0.png @@ -854,7 +854,7 @@

    Contents

    Take a look at the results:

    -
    cat detectron2_test.json
    +
    cat detectron2_test.json
     
    {
    diff --git a/tutorials/convert_nb.html b/tutorials/convert_nb.html
    index 2c708ea6..aa3af550 100644
    --- a/tutorials/convert_nb.html
    +++ b/tutorials/convert_nb.html
    @@ -348,42 +348,42 @@ 

    Contents

    In this chapter, we create an evaluation dataset as defined by the MPEG-VCM working group

    -
    # common libs
    -import math, os, io, json, cv2, random, logging
    -import numpy as np
    -# images
    -from PIL import Image
    -import matplotlib.pyplot as plt
    +
    # common libs
    +import math, os, io, json, cv2, random, logging
    +import numpy as np
    +# images
    +from PIL import Image
    +import matplotlib.pyplot as plt
     
    -
    homie=os.path.expanduser("~")
    -print("your home path is", homie)
    -fodir=os.path.join(homie,'fiftyone')
    -print("fiftyone dowloads data by default to", fodir)
    -try:
    -    os.mkdir(fodir)
    -except FileExistsError:
    -    pass
    +
    homie=os.path.expanduser("~")
    +print("your home path is", homie)
    +fodir=os.path.join(homie,'fiftyone')
    +print("fiftyone dowloads data by default to", fodir)
    +try:
    +    os.mkdir(fodir)
    +except FileExistsError:
    +    pass
     
    your home path is /home/sampsa
     fiftyone dowloads data by default to /home/sampsa/fiftyone
     
    -
    # fiftyone
    -import fiftyone as fo
    -import fiftyone.zoo as foz
    +
    # fiftyone
    +import fiftyone as fo
    +import fiftyone.zoo as foz
     
    -
    # CompressAI-Vision
    -from compressai_vision.conversion import MPEGVCMToOpenImageV6, imageIdFileList
    +
    # CompressAI-Vision
    +from compressai_vision.conversion import MPEGVCMToOpenImageV6, imageIdFileList
     

    We expect that you have downloaded correct images and segmentation masks into open-images-v6 folder (as instructed in the previous chapter)

    -
    dir_=os.path.join(fodir,"open-images-v6")
    -print("contents of", dir_,":")
    -!tree --filelimit=10 $dir_ | cat
    +
    dir_=os.path.join(fodir,"open-images-v6")
    +print("contents of", dir_,":")
    +!tree --filelimit=10 $dir_ | cat
     
    contents of /home/sampsa/fiftyone/open-images-v6 :
    @@ -418,36 +418,36 @@ 

    Contents

    detection_validation_input_5k.lst = list of images used
    -
    # TODO: define path_to_mpeg_vcm_files
    -path_to_images=os.path.join(fodir,"open-images-v6/validation/data")
    +
    # TODO: define path_to_mpeg_vcm_files
    +path_to_images=os.path.join(fodir,"open-images-v6/validation/data")
     
    -list_file=os.path.join(path_to_mpeg_vcm_files, "detection_validation_input_5k.lst")
    -bbox_csv_file=os.path.join(path_to_mpeg_vcm_files, "detection_validation_5k_bbox.csv")
    -validation_csv_file=os.path.join(path_to_mpeg_vcm_files, "detection_validation_labels_5k.csv")
    +list_file=os.path.join(path_to_mpeg_vcm_files, "detection_validation_input_5k.lst")
    +bbox_csv_file=os.path.join(path_to_mpeg_vcm_files, "detection_validation_5k_bbox.csv")
    +validation_csv_file=os.path.join(path_to_mpeg_vcm_files, "detection_validation_labels_5k.csv")
     
    -assert(os.path.exists(bbox_csv_file)), "can't find bbox file"
    -assert(os.path.exists(validation_csv_file)), "can't find labels file"
    -assert(os.path.exists(path_to_images)), "can't find image directory"
    +assert(os.path.exists(bbox_csv_file)), "can't find bbox file"
    +assert(os.path.exists(validation_csv_file)), "can't find labels file"
    +assert(os.path.exists(path_to_images)), "can't find image directory"
     

    Now we convert mpeg vmc proprietary format annotation into proper OpenImageV6 format dataset and place it into ~/fiftyone/mpeg_vcm-detection

    First, remove any previously imported stuff:

    -
    !rm -rf ~/fiftyone/mpeg-vcm-*
    +
    !rm -rf ~/fiftyone/mpeg-vcm-*
     
    -
    MPEGVCMToOpenImageV6(
    -    validation_csv_file=validation_csv_file,
    -    list_file=list_file,
    -    bbox_csv_file=bbox_csv_file,
    -    output_directory=os.path.join(fodir,"mpeg-vcm-detection"),
    -    data_dir=path_to_images
    -)
    +
    MPEGVCMToOpenImageV6(
    +    validation_csv_file=validation_csv_file,
    +    list_file=list_file,
    +    bbox_csv_file=bbox_csv_file,
    +    output_directory=os.path.join(fodir,"mpeg-vcm-detection"),
    +    data_dir=path_to_images
    +)
     

    let’s see what we got:

    -
    !tree --filelimit=10 ~/fiftyone/mpeg-vcm-detection | cat
    +
    !tree --filelimit=10 ~/fiftyone/mpeg-vcm-detection | cat
     
    /home/sampsa/fiftyone/mpeg-vcm-detection
    @@ -469,46 +469,46 @@ 

    Contents

    data -> ~/fiftyone/open-images-v6/validation/data)

    The only thing we’re left to do, is to register this OpenImageV6 formatted dataset into fiftyone:

    -
    # remove the dataset in the case it was already registered in fiftyone
    -try:
    -    fo.delete_dataset("mpeg-vcm-detection")
    -except ValueError as e:
    -    print("could not delete because of", e)
    +
    # remove the dataset in the case it was already registered in fiftyone
    +try:
    +    fo.delete_dataset("mpeg-vcm-detection")
    +except ValueError as e:
    +    print("could not delete because of", e)
     
    -
    dataset_type = fo.types.OpenImagesV6Dataset
    -dataset_dir = os.path.join(fodir,"mpeg-vcm-detection")
    -dataset = fo.Dataset.from_dir(
    -    dataset_dir=dataset_dir,
    -    dataset_type=dataset_type,
    -    label_types=("detections","classifications"),
    -    load_hierarchy=False,
    -    name="mpeg-vcm-detection",
    -    image_ids=imageIdFileList(list_file)
    -)
    +
    dataset_type = fo.types.OpenImagesV6Dataset
    +dataset_dir = os.path.join(fodir,"mpeg-vcm-detection")
    +dataset = fo.Dataset.from_dir(
    +    dataset_dir=dataset_dir,
    +    dataset_type=dataset_type,
    +    label_types=("detections","classifications"),
    +    load_hierarchy=False,
    +    name="mpeg-vcm-detection",
    +    image_ids=imageIdFileList(list_file)
    +)
     
    100% |███████████████| 5000/5000 [16.8s elapsed, 0s remaining, 290.4 samples/s]
     
    -
    dataset.persistent=True # without this, your dabatase will disappear!
    +
    dataset.persistent=True # without this, your dabatase will disappear!
     
    -
    ## now, in the future, just do
    -dataset = fo.load_dataset("mpeg-vcm-detection")
    +
    ## now, in the future, just do
    +dataset = fo.load_dataset("mpeg-vcm-detection")
     

    Finaly, let’s also create a dummy dataset for debugging and testing with only one sample:

    -
    try:
    -    fo.delete_dataset("mpeg-vcm-detection-dummy")
    -except ValueError:
    -    print("no dummmy dataset yet..")
    -dummy_dataset=fo.Dataset("mpeg-vcm-detection-dummy")
    -for sample in dataset[0:1]:
    -    dummy_dataset.add_sample(sample)
    -dummy_dataset.persistent=True
    -print("dummy dataset ok")
    +
    try:
    +    fo.delete_dataset("mpeg-vcm-detection-dummy")
    +except ValueError:
    +    print("no dummmy dataset yet..")
    +dummy_dataset=fo.Dataset("mpeg-vcm-detection-dummy")
    +for sample in dataset[0:1]:
    +    dummy_dataset.add_sample(sample)
    +dummy_dataset.persistent=True
    +print("dummy dataset ok")
     
    dummy dataset ok
    diff --git a/tutorials/detectron2.html b/tutorials/detectron2.html
    index c06c6e23..ce77b478 100644
    --- a/tutorials/detectron2.html
    +++ b/tutorials/detectron2.html
    @@ -346,90 +346,90 @@ 

    2. Run Detectron2
    # common libs
    -import math, os, io, json, cv2, random, logging, datetime
    -import numpy as np
    -# torch
    -import torch
    -from torchvision import transforms
    -# images
    -from PIL import Image
    -import matplotlib.pyplot as plt
    +
    # common libs
    +import math, os, io, json, cv2, random, logging, datetime
    +import numpy as np
    +# torch
    +import torch
    +from torchvision import transforms
    +# images
    +from PIL import Image
    +import matplotlib.pyplot as plt
     
    -
    # define a helper function
    -def cv2_imshow(img):
    -    img2 = img[:,:,::-1]
    -    plt.figure(figsize=(12, 9))
    -    plt.axis('off')
    -    plt.imshow(img2)
    -    plt.show()
    +
    # define a helper function
    +def cv2_imshow(img):
    +    img2 = img[:,:,::-1]
    +    plt.figure(figsize=(12, 9))
    +    plt.axis('off')
    +    plt.imshow(img2)
    +    plt.show()
     
    -
    ## *** Detectron imports ***
    -import detectron2
    -from detectron2.utils.logger import setup_logger
    -setup_logger()
    +
    ## *** Detectron imports ***
    +import detectron2
    +from detectron2.utils.logger import setup_logger
    +setup_logger()
     
    -# import some common detectron2 utilities
    -from detectron2 import model_zoo
    -from detectron2.engine import DefaultPredictor
    -from detectron2.config import get_cfg
    -from detectron2.utils.visualizer import Visualizer
    -from detectron2.data import MetadataCatalog, DatasetCatalog
    +# import some common detectron2 utilities
    +from detectron2 import model_zoo
    +from detectron2.engine import DefaultPredictor
    +from detectron2.config import get_cfg
    +from detectron2.utils.visualizer import Visualizer
    +from detectron2.data import MetadataCatalog, DatasetCatalog
     
    -
    # CompressAI-Vision
    -from compressai_vision.conversion import FO2DetectronDataset # convert fiftyone dataset to Detectron2 dataset
    -from compressai_vision.conversion import detectron251 # convert Detectron2 results to fiftyone format
    -from compressai_vision.evaluation.fo import annexPredictions # crunch a complete fiftyone dataset through Detectron2 predictor and add the predictions to the fiftyone dataset
    +
    # CompressAI-Vision
    +from compressai_vision.conversion import FO2DetectronDataset # convert fiftyone dataset to Detectron2 dataset
    +from compressai_vision.conversion import detectron251 # convert Detectron2 results to fiftyone format
    +from compressai_vision.evaluation.fo import annexPredictions # crunch a complete fiftyone dataset through Detectron2 predictor and add the predictions to the fiftyone dataset
     
    -
    # fiftyone
    -import fiftyone as fo
    -import fiftyone.zoo as foz
    +
    # fiftyone
    +import fiftyone as fo
    +import fiftyone.zoo as foz
     
    -
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    -print(device)
    +
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    +print(device)
     
    cpu
     
    -
    print("torch:", torch.__version__, "/ cuda:", torch.version.cuda, "/ detectron2:", detectron2.__version__)
    +
    print("torch:", torch.__version__, "/ cuda:", torch.version.cuda, "/ detectron2:", detectron2.__version__)
     
    torch: 1.9.1+cu102 / cuda: 10.2 / detectron2: 0.6
     

    Let’s pick up correct Detectron2 model

    -
    ## MODEL A
    -model_name="COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"
    -## look here:
    -## https://github.com/facebookresearch/detectron2/blob/main/MODEL_ZOO.md#faster-r-cnn
    +
    ## MODEL A
    +model_name="COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"
    +## look here:
    +## https://github.com/facebookresearch/detectron2/blob/main/MODEL_ZOO.md#faster-r-cnn
     
    -## MODEL B
    -# model_name="COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
    +## MODEL B
    +# model_name="COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
     
    -
    # cfg encapsulates the model architecture & weights, also threshold parameter, metadata, etc.
    -cfg = get_cfg()
    -cfg.MODEL.DEVICE=device
    -# load config from a file:
    -cfg.merge_from_file(model_zoo.get_config_file(model_name))
    -# DO NOT TOUCH THRESHOLD WHEN DOING EVALUATION:
    -# too big a threshold will cut the smallest values & affect the precision(recall) curves & evaluation results
    -# the default value is 0.05
    -# value of 0.01 saturates the results (they don't change at lower values)
    -# cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    -# get weights
    -cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_name)
    -print("expected input colorspace:", cfg.INPUT.FORMAT)
    -print("loaded datasets:", cfg.DATASETS)
    -model_dataset=cfg.DATASETS.TRAIN[0]
    -print("model was trained with", model_dataset)
    -model_meta=MetadataCatalog.get(model_dataset)
    +
    # cfg encapsulates the model architecture & weights, also threshold parameter, metadata, etc.
    +cfg = get_cfg()
    +cfg.MODEL.DEVICE=device
    +# load config from a file:
    +cfg.merge_from_file(model_zoo.get_config_file(model_name))
    +# DO NOT TOUCH THRESHOLD WHEN DOING EVALUATION:
    +# too big a threshold will cut the smallest values & affect the precision(recall) curves & evaluation results
    +# the default value is 0.05
    +# value of 0.01 saturates the results (they don't change at lower values)
    +# cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    +# get weights
    +cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_name)
    +print("expected input colorspace:", cfg.INPUT.FORMAT)
    +print("loaded datasets:", cfg.DATASETS)
    +model_dataset=cfg.DATASETS.TRAIN[0]
    +print("model was trained with", model_dataset)
    +model_meta=MetadataCatalog.get(model_dataset)
     
    expected input colorspace: BGR
    @@ -442,15 +442,15 @@ 

    2. Run Detectron2
    predictor = DefaultPredictor(cfg)
    +
    predictor = DefaultPredictor(cfg)
     

    Get handle to a dataset. We will be using the oiv6-mpeg-v1 dataset. Please go through the CLI Tutorials in order to produce this dataset.

    -
    dataset = fo.load_dataset("oiv6-mpeg-detection-v1")
    +
    dataset = fo.load_dataset("oiv6-mpeg-detection-v1")
     
    -
    dataset
    +
    dataset
     
    Name:        oiv6-mpeg-detection-v1
    @@ -470,25 +470,25 @@ 

    2. Run Detectron2
    detectron_dataset=FO2DetectronDataset(fo_dataset=dataset, model_catids=model_meta.thing_classes)
    +
    detectron_dataset=FO2DetectronDataset(fo_dataset=dataset, model_catids=model_meta.thing_classes)
     

    Pick a sample:

    -
    d=detectron_dataset[3]
    +
    d=detectron_dataset[3]
     

    We can visualize that sample also with Detectron2 library tools (although we’d prefer fiftyone with fo.launch_app(dataset)):

    -
    # visualize with Detectron2 tools only
    -img = cv2.imread(d["file_name"])
    -visualizer = Visualizer(img[:, :, ::-1], metadata=model_meta, scale=0.5)
    -out = visualizer.draw_dataset_dict(d)
    -cv2_imshow(out.get_image()[:, :, ::-1])
    +
    # visualize with Detectron2 tools only
    +img = cv2.imread(d["file_name"])
    +visualizer = Visualizer(img[:, :, ::-1], metadata=model_meta, scale=0.5)
    +out = visualizer.draw_dataset_dict(d)
    +cv2_imshow(out.get_image()[:, :, ::-1])
     
    ../_images/detectron2_nb_20_0.png

    Let’s try the Detectron2 predictor:

    -
    res=predictor(img)
    +
    res=predictor(img)
     
    /home/sampsa/silo/interdigital/venv_all/lib/python3.8/site-packages/torch/_tensor.py:575: UserWarning: floor_divide is deprecated, and will be removed in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values.
    @@ -497,10 +497,10 @@ 

    2. Run Detectron2
    dets=detectron251(res, model_catids=model_meta.thing_classes) # process involves going from class indexes (ints) to class labels (strings)
    +
    dets=detectron251(res, model_catids=model_meta.thing_classes) # process involves going from class indexes (ints) to class labels (strings)
     
    -
    dets
    +
    dets
     
    <Detections: {
    @@ -754,16 +754,16 @@ 

    2. Run Detectron2oiv6-mpeg-detection-v1-dummy created in the CLI tutorials with the compressai-vision import-custom command

    -
    dataset = fo.load_dataset("oiv6-mpeg-detection-v1-dummy")
    +
    dataset = fo.load_dataset("oiv6-mpeg-detection-v1-dummy")
     

    Detectron prediction results are saved during the run into the fiftyone (mongodb) database. Let’s define a unique name for the sample field where the detectron results will be saved:

    -
    predictor_field='detectron-predictions'
    +
    predictor_field='detectron-predictions'
     
    -
    annexPredictions(predictors=[predictor], fo_dataset=dataset, predictor_fields=[predictor_field])
    +
    annexPredictions(predictors=[predictor], fo_dataset=dataset, predictor_fields=[predictor_field])
     
    sample:  1 / 1
    @@ -771,7 +771,7 @@ 

    2. Run Detectron2detectron-predictions has appeared into the dataset:

    -
    print(dataset)
    +
    print(dataset)
     
    Name:        oiv6-mpeg-detection-v1-dummy
    @@ -792,10 +792,10 @@ 

    2. Run Detectron2
    sample=dataset.first()
    +
    sample=dataset.first()
     
    -
    print(sample)
    +
    print(sample)
     
    <Sample: {
    @@ -1438,15 +1438,15 @@ 

    2. Run Detectron2
    results = dataset.evaluate_detections(
    -    predictor_field,
    -    gt_field="detections",
    -    method="open-images",
    -    pos_label_field="positive_labels",
    -    neg_label_field="negative_labels",
    -    expand_pred_hierarchy=False,
    -    expand_gt_hierarchy=False
    -)
    +
    results = dataset.evaluate_detections(
    +    predictor_field,
    +    gt_field="detections",
    +    method="open-images",
    +    pos_label_field="positive_labels",
    +    neg_label_field="negative_labels",
    +    expand_pred_hierarchy=False,
    +    expand_gt_hierarchy=False
    +)
     
    Evaluating detections...
    @@ -1455,22 +1455,22 @@ 

    2. Run Detectron2
    dataset.delete_sample_fields(predictor_field)
    +
    dataset.delete_sample_fields(predictor_field)
     

    OpenImageV6 evaluation protocol mAP:

    -
    results.mAP()
    +
    results.mAP()
     
    1.0
     

    Per class mAP:

    -
    classes = dataset.distinct(
    -    "detections.detections.label"
    -)
    -for class_ in classes:
    -    print(class_, results.mAP([class_]))
    +
    classes = dataset.distinct(
    +    "detections.detections.label"
    +)
    +for class_ in classes:
    +    print(class_, results.mAP([class_]))
     
    airplane 1.0
    diff --git a/tutorials/detectron2_nb.html b/tutorials/detectron2_nb.html
    index d764a0e0..a47e7f20 100644
    --- a/tutorials/detectron2_nb.html
    +++ b/tutorials/detectron2_nb.html
    @@ -349,90 +349,90 @@ 

    Contents

    In this chapter we look into fiftyone/detectron2 interface, how to add detectron2 results into a fiftyone dataset and how to evaluate detectron2 results with fiftyone.

    -
    # common libs
    -import math, os, io, json, cv2, random, logging, datetime
    -import numpy as np
    -# torch
    -import torch
    -from torchvision import transforms
    -# images
    -from PIL import Image
    -import matplotlib.pyplot as plt
    +
    # common libs
    +import math, os, io, json, cv2, random, logging, datetime
    +import numpy as np
    +# torch
    +import torch
    +from torchvision import transforms
    +# images
    +from PIL import Image
    +import matplotlib.pyplot as plt
     
    -
    # define a helper function
    -def cv2_imshow(img):
    -    img2 = img[:,:,::-1]
    -    plt.figure(figsize=(12, 9))
    -    plt.axis('off')
    -    plt.imshow(img2)
    -    plt.show()
    +
    # define a helper function
    +def cv2_imshow(img):
    +    img2 = img[:,:,::-1]
    +    plt.figure(figsize=(12, 9))
    +    plt.axis('off')
    +    plt.imshow(img2)
    +    plt.show()
     
    -
    ## *** Detectron imports ***
    -import detectron2
    -from detectron2.utils.logger import setup_logger
    -setup_logger()
    +
    ## *** Detectron imports ***
    +import detectron2
    +from detectron2.utils.logger import setup_logger
    +setup_logger()
     
    -# import some common detectron2 utilities
    -from detectron2 import model_zoo
    -from detectron2.engine import DefaultPredictor
    -from detectron2.config import get_cfg
    -from detectron2.utils.visualizer import Visualizer
    -from detectron2.data import MetadataCatalog, DatasetCatalog
    +# import some common detectron2 utilities
    +from detectron2 import model_zoo
    +from detectron2.engine import DefaultPredictor
    +from detectron2.config import get_cfg
    +from detectron2.utils.visualizer import Visualizer
    +from detectron2.data import MetadataCatalog, DatasetCatalog
     
    -
    # CompressAI-Vision
    -from compressai_vision.conversion import FO2DetectronDataset # convert fiftyone dataset to Detectron2 dataset
    -from compressai_vision.conversion import detectron251 # convert Detectron2 results to fiftyone format
    -from compressai_vision.evaluation.fo import annexPredictions # crunch a complete fiftyone dataset through Detectron2 predictor and add the predictions to the fiftyone dataset
    +
    # CompressAI-Vision
    +from compressai_vision.conversion import FO2DetectronDataset # convert fiftyone dataset to Detectron2 dataset
    +from compressai_vision.conversion import detectron251 # convert Detectron2 results to fiftyone format
    +from compressai_vision.evaluation.fo import annexPredictions # crunch a complete fiftyone dataset through Detectron2 predictor and add the predictions to the fiftyone dataset
     
    -
    # fiftyone
    -import fiftyone as fo
    -import fiftyone.zoo as foz
    +
    # fiftyone
    +import fiftyone as fo
    +import fiftyone.zoo as foz
     
    -
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    -print(device)
    +
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    +print(device)
     
    cpu
     
    -
    print("torch:", torch.__version__, "/ cuda:", torch.version.cuda, "/ detectron2:", detectron2.__version__)
    +
    print("torch:", torch.__version__, "/ cuda:", torch.version.cuda, "/ detectron2:", detectron2.__version__)
     
    torch: 1.9.1+cu102 / cuda: 10.2 / detectron2: 0.6
     

    Let’s pick up correct Detectron2 model

    -
    ## MODEL A
    -model_name="COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"
    -## look here:
    -## https://github.com/facebookresearch/detectron2/blob/main/MODEL_ZOO.md#faster-r-cnn
    +
    ## MODEL A
    +model_name="COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"
    +## look here:
    +## https://github.com/facebookresearch/detectron2/blob/main/MODEL_ZOO.md#faster-r-cnn
     
    -## MODEL B
    -# model_name="COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
    +## MODEL B
    +# model_name="COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
     
    -
    # cfg encapsulates the model architecture & weights, also threshold parameter, metadata, etc.
    -cfg = get_cfg()
    -cfg.MODEL.DEVICE=device
    -# load config from a file:
    -cfg.merge_from_file(model_zoo.get_config_file(model_name))
    -# DO NOT TOUCH THRESHOLD WHEN DOING EVALUATION:
    -# too big a threshold will cut the smallest values & affect the precision(recall) curves & evaluation results
    -# the default value is 0.05
    -# value of 0.01 saturates the results (they don't change at lower values)
    -# cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    -# get weights
    -cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_name)
    -print("expected input colorspace:", cfg.INPUT.FORMAT)
    -print("loaded datasets:", cfg.DATASETS)
    -model_dataset=cfg.DATASETS.TRAIN[0]
    -print("model was trained with", model_dataset)
    -model_meta=MetadataCatalog.get(model_dataset)
    +
    # cfg encapsulates the model architecture & weights, also threshold parameter, metadata, etc.
    +cfg = get_cfg()
    +cfg.MODEL.DEVICE=device
    +# load config from a file:
    +cfg.merge_from_file(model_zoo.get_config_file(model_name))
    +# DO NOT TOUCH THRESHOLD WHEN DOING EVALUATION:
    +# too big a threshold will cut the smallest values & affect the precision(recall) curves & evaluation results
    +# the default value is 0.05
    +# value of 0.01 saturates the results (they don't change at lower values)
    +# cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    +# get weights
    +cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_name)
    +print("expected input colorspace:", cfg.INPUT.FORMAT)
    +print("loaded datasets:", cfg.DATASETS)
    +model_dataset=cfg.DATASETS.TRAIN[0]
    +print("model was trained with", model_dataset)
    +model_meta=MetadataCatalog.get(model_dataset)
     
    expected input colorspace: BGR
    @@ -445,15 +445,15 @@ 

    Contents

    model was trained with coco_2017_train
    -
    predictor = DefaultPredictor(cfg)
    +
    predictor = DefaultPredictor(cfg)
     

    Get handle to a dataset. We will be using the oiv6-mpeg-v1 dataset. Please go through the CLI Tutorials in order to produce this dataset.

    -
    dataset = fo.load_dataset("oiv6-mpeg-detection-v1")
    +
    dataset = fo.load_dataset("oiv6-mpeg-detection-v1")
     
    -
    dataset
    +
    dataset
     
    Name:        oiv6-mpeg-detection-v1
    @@ -473,25 +473,25 @@ 

    Contents

    We can go from fiftyone dataset to Detectron2 dataset:

    -
    detectron_dataset=FO2DetectronDataset(fo_dataset=dataset, model_catids=model_meta.thing_classes)
    +
    detectron_dataset=FO2DetectronDataset(fo_dataset=dataset, model_catids=model_meta.thing_classes)
     

    Pick a sample:

    -
    d=detectron_dataset[3]
    +
    d=detectron_dataset[3]
     

    We can visualize that sample also with Detectron2 library tools (although we’d prefer fiftyone with fo.launch_app(dataset)):

    -
    # visualize with Detectron2 tools only
    -img = cv2.imread(d["file_name"])
    -visualizer = Visualizer(img[:, :, ::-1], metadata=model_meta, scale=0.5)
    -out = visualizer.draw_dataset_dict(d)
    -cv2_imshow(out.get_image()[:, :, ::-1])
    +
    # visualize with Detectron2 tools only
    +img = cv2.imread(d["file_name"])
    +visualizer = Visualizer(img[:, :, ::-1], metadata=model_meta, scale=0.5)
    +out = visualizer.draw_dataset_dict(d)
    +cv2_imshow(out.get_image()[:, :, ::-1])
     
    ../_images/detectron2_nb_20_0.png

    Let’s try the Detectron2 predictor:

    -
    res=predictor(img)
    +
    res=predictor(img)
     
    /home/sampsa/silo/interdigital/venv_all/lib/python3.8/site-packages/torch/_tensor.py:575: UserWarning: floor_divide is deprecated, and will be removed in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values.
    @@ -500,10 +500,10 @@ 

    Contents

    We can convert from Detectron2 format to fiftyone detection objects:

    -
    dets=detectron251(res, model_catids=model_meta.thing_classes) # process involves going from class indexes (ints) to class labels (strings)
    +
    dets=detectron251(res, model_catids=model_meta.thing_classes) # process involves going from class indexes (ints) to class labels (strings)
     
    -
    dets
    +
    dets
     
    <Detections: {
    @@ -757,16 +757,16 @@ 

    Contents

    We use the dummy single-sample dataset oiv6-mpeg-detection-v1-dummy created in the CLI tutorials with the compressai-vision import-custom command

    -
    dataset = fo.load_dataset("oiv6-mpeg-detection-v1-dummy")
    +
    dataset = fo.load_dataset("oiv6-mpeg-detection-v1-dummy")
     

    Detectron prediction results are saved during the run into the fiftyone (mongodb) database. Let’s define a unique name for the sample field where the detectron results will be saved:

    -
    predictor_field='detectron-predictions'
    +
    predictor_field='detectron-predictions'
     
    -
    annexPredictions(predictors=[predictor], fo_dataset=dataset, predictor_fields=[predictor_field])
    +
    annexPredictions(predictors=[predictor], fo_dataset=dataset, predictor_fields=[predictor_field])
     
    sample:  1 / 1
    @@ -774,7 +774,7 @@ 

    Contents

    After that one, the dataset looks slightly different. Take note that an extra field detectron-predictions has appeared into the dataset:

    -
    print(dataset)
    +
    print(dataset)
     
    Name:        oiv6-mpeg-detection-v1-dummy
    @@ -795,10 +795,10 @@ 

    Contents

    Let’s peek at the first sample:

    -
    sample=dataset.first()
    +
    sample=dataset.first()
     
    -
    print(sample)
    +
    print(sample)
     
    <Sample: {
    @@ -1441,15 +1441,15 @@ 

    Contents

    “detectron-predictions” (predicted values). Now we can run the OpenImageV6 evaluation protocol on the dataset which uses the ground truth and the predictor results:

    -
    results = dataset.evaluate_detections(
    -    predictor_field,
    -    gt_field="detections",
    -    method="open-images",
    -    pos_label_field="positive_labels",
    -    neg_label_field="negative_labels",
    -    expand_pred_hierarchy=False,
    -    expand_gt_hierarchy=False
    -)
    +
    results = dataset.evaluate_detections(
    +    predictor_field,
    +    gt_field="detections",
    +    method="open-images",
    +    pos_label_field="positive_labels",
    +    neg_label_field="negative_labels",
    +    expand_pred_hierarchy=False,
    +    expand_gt_hierarchy=False
    +)
     
    Evaluating detections...
    @@ -1458,22 +1458,22 @@ 

    Contents

    After the evaluation we can should remove the detectron results from the database:

    -
    dataset.delete_sample_fields(predictor_field)
    +
    dataset.delete_sample_fields(predictor_field)
     

    OpenImageV6 evaluation protocol mAP:

    -
    results.mAP()
    +
    results.mAP()
     
    1.0
     

    Per class mAP:

    -
    classes = dataset.distinct(
    -    "detections.detections.label"
    -)
    -for class_ in classes:
    -    print(class_, results.mAP([class_]))
    +
    classes = dataset.distinct(
    +    "detections.detections.label"
    +)
    +for class_ in classes:
    +    print(class_, results.mAP([class_]))
     
    airplane 1.0
    diff --git a/tutorials/download.html b/tutorials/download.html
    index 69cff618..a52d642b 100644
    --- a/tutorials/download.html
    +++ b/tutorials/download.html
    @@ -344,31 +344,31 @@ 

    1. Download Images
    # common libs
    -import math, os, io, json, cv2, random, logging
    -import numpy as np
    -# images
    -from PIL import Image
    -import matplotlib.pyplot as plt
    +
    # common libs
    +import math, os, io, json, cv2, random, logging
    +import numpy as np
    +# images
    +from PIL import Image
    +import matplotlib.pyplot as plt
     
    -
    # fiftyone
    -import fiftyone as fo
    -import fiftyone.zoo as foz
    +
    # fiftyone
    +import fiftyone as fo
    +import fiftyone.zoo as foz
     
    -
    # CompressAI-Vision
    -from compressai_vision.conversion import imageIdFileList
    +
    # CompressAI-Vision
    +from compressai_vision.conversion import imageIdFileList
     
    -
    homie=os.path.expanduser("~")
    -print("your home path is", homie)
    -fodir=os.path.join(homie,'fiftyone')
    -print("fiftyone dowloads data by default to", fodir)
    -try:
    -    os.mkdir(fodir)
    -except FileExistsError:
    -    pass
    +
    homie=os.path.expanduser("~")
    +print("your home path is", homie)
    +fodir=os.path.join(homie,'fiftyone')
    +print("fiftyone dowloads data by default to", fodir)
    +try:
    +    os.mkdir(fodir)
    +except FileExistsError:
    +    pass
     
    your home path is /home/sampsa
    @@ -376,7 +376,7 @@ 

    1. Download Images
    fo.list_datasets()
    +
    fo.list_datasets()
     
    ['detectron-run-sampsa-oiv6-mpeg-detection-v1-2022-11-16-17-22-40-319395',
    @@ -398,10 +398,10 @@ 

    1. Download Imagesdetection_validation_input_5k.lst and segmentation_validation_input_5k.lst

    -
    path_to_list_file="/home/sampsa/silo/interdigital/CompressAI-Vision/compressai_vision/data/mpeg_vcm_data"
    +
    path_to_list_file="/home/sampsa/silo/interdigital/CompressAI-Vision/compressai_vision/data/mpeg_vcm_data"
     
    -
    !head -n10 {path_to_list_file}/detection_validation_input_5k.lst
    +
    !head -n10 {path_to_list_file}/detection_validation_input_5k.lst
     
    bef50424c62d12c5.jpg
    @@ -416,25 +416,25 @@ 

    1. Download Images
    det_lst=os.path.join(path_to_mpeg_vcm_files,"detection_validation_input_5k.lst")
    -seg_lst=os.path.join(path_to_mpeg_vcm_files, "segmentation_validation_input_5k.lst")
    -assert(os.path.exists(det_lst)), "missing file "+det_lst
    -assert(os.path.exists(seg_lst)), "missing file "+seg_lst
    -lis=imageIdFileList(det_lst, seg_lst)
    -print(len(lis))
    +
    det_lst=os.path.join(path_to_mpeg_vcm_files,"detection_validation_input_5k.lst")
    +seg_lst=os.path.join(path_to_mpeg_vcm_files, "segmentation_validation_input_5k.lst")
    +assert(os.path.exists(det_lst)), "missing file "+det_lst
    +assert(os.path.exists(seg_lst)), "missing file "+seg_lst
    +lis=imageIdFileList(det_lst, seg_lst)
    +print(len(lis))
     
    8189
     

    Tell fiftyone to load the correct subset of OpenImageV6 dataset:

    -
    # https://voxel51.com/docs/fiftyone/user_guide/dataset_zoo/datasets.html#dataset-zoo-open-images-v6
    -dataset = foz.load_zoo_dataset(
    -    "open-images-v6",
    -    split="validation",
    -    # label_types=("detections", "classifications", "relationships", "segmentations") # this is the default
    -    image_ids=lis
    -)
    +
    # https://voxel51.com/docs/fiftyone/user_guide/dataset_zoo/datasets.html#dataset-zoo-open-images-v6
    +dataset = foz.load_zoo_dataset(
    +    "open-images-v6",
    +    split="validation",
    +    # label_types=("detections", "classifications", "relationships", "segmentations") # this is the default
    +    image_ids=lis
    +)
     
    Downloading split 'validation' to '/home/sampsa/fiftyone/open-images-v6/validation' if necessary
    @@ -443,8 +443,8 @@ 

    1. Download Images
    # take a look at the dataset
    -dataset
    +
    # take a look at the dataset
    +dataset
     
    -
    # make dataset persistent .. next time you import fiftyone it's still available (loaded into the mongodb that's running in the background)
    -dataset.persistent=True
    +
    # make dataset persistent .. next time you import fiftyone it's still available (loaded into the mongodb that's running in the background)
    +dataset.persistent=True
     
    -
    # next time you need it, load it with:
    -dataset = fo.load_dataset("open-images-v6-validation")
    +
    # next time you need it, load it with:
    +dataset = fo.load_dataset("open-images-v6-validation")
     
    -
    # peek at first sample
    -dataset.first()
    +
    # peek at first sample
    +dataset.first()
     
    <Sample: {
    @@ -527,9 +527,9 @@ 

    1. Download Images
    dir_=os.path.join(fodir,"open-images-v6")
    -print("contents of", dir_,":")
    -!tree --filelimit=10 $dir_ | cat
    +
    dir_=os.path.join(fodir,"open-images-v6")
    +print("contents of", dir_,":")
    +!tree --filelimit=10 $dir_ | cat
     
    contents of /home/sampsa/fiftyone/open-images-v6 :
    diff --git a/tutorials/download_nb.html b/tutorials/download_nb.html
    index a1b5eb1b..90334a73 100644
    --- a/tutorials/download_nb.html
    +++ b/tutorials/download_nb.html
    @@ -348,31 +348,31 @@ 

    Contents

    In this chapter we use fiftyone to download, inspect and visualize a subset of OpenImageV6 images

    -
    # common libs
    -import math, os, io, json, cv2, random, logging
    -import numpy as np
    -# images
    -from PIL import Image
    -import matplotlib.pyplot as plt
    +
    # common libs
    +import math, os, io, json, cv2, random, logging
    +import numpy as np
    +# images
    +from PIL import Image
    +import matplotlib.pyplot as plt
     
    -
    # fiftyone
    -import fiftyone as fo
    -import fiftyone.zoo as foz
    +
    # fiftyone
    +import fiftyone as fo
    +import fiftyone.zoo as foz
     
    -
    # CompressAI-Vision
    -from compressai_vision.conversion import imageIdFileList
    +
    # CompressAI-Vision
    +from compressai_vision.conversion import imageIdFileList
     
    -
    homie=os.path.expanduser("~")
    -print("your home path is", homie)
    -fodir=os.path.join(homie,'fiftyone')
    -print("fiftyone dowloads data by default to", fodir)
    -try:
    -    os.mkdir(fodir)
    -except FileExistsError:
    -    pass
    +
    homie=os.path.expanduser("~")
    +print("your home path is", homie)
    +fodir=os.path.join(homie,'fiftyone')
    +print("fiftyone dowloads data by default to", fodir)
    +try:
    +    os.mkdir(fodir)
    +except FileExistsError:
    +    pass
     
    your home path is /home/sampsa
    @@ -380,7 +380,7 @@ 

    Contents

    List all datasets (already) registered to fiftyone

    -
    fo.list_datasets()
    +
    fo.list_datasets()
     
    ['detectron-run-sampsa-oiv6-mpeg-detection-v1-2022-11-16-17-22-40-319395',
    @@ -402,10 +402,10 @@ 

    Contents

    OpenImageV6.

    Let’s use two files: detection_validation_input_5k.lst and segmentation_validation_input_5k.lst

    -
    path_to_list_file="/home/sampsa/silo/interdigital/CompressAI-Vision/compressai_vision/data/mpeg_vcm_data"
    +
    path_to_list_file="/home/sampsa/silo/interdigital/CompressAI-Vision/compressai_vision/data/mpeg_vcm_data"
     
    -
    !head -n10 {path_to_list_file}/detection_validation_input_5k.lst
    +
    !head -n10 {path_to_list_file}/detection_validation_input_5k.lst
     
    bef50424c62d12c5.jpg
    @@ -420,25 +420,25 @@ 

    Contents

    2e96665b867c4d0f.jpg
    -
    det_lst=os.path.join(path_to_mpeg_vcm_files,"detection_validation_input_5k.lst")
    -seg_lst=os.path.join(path_to_mpeg_vcm_files, "segmentation_validation_input_5k.lst")
    -assert(os.path.exists(det_lst)), "missing file "+det_lst
    -assert(os.path.exists(seg_lst)), "missing file "+seg_lst
    -lis=imageIdFileList(det_lst, seg_lst)
    -print(len(lis))
    +
    det_lst=os.path.join(path_to_mpeg_vcm_files,"detection_validation_input_5k.lst")
    +seg_lst=os.path.join(path_to_mpeg_vcm_files, "segmentation_validation_input_5k.lst")
    +assert(os.path.exists(det_lst)), "missing file "+det_lst
    +assert(os.path.exists(seg_lst)), "missing file "+seg_lst
    +lis=imageIdFileList(det_lst, seg_lst)
    +print(len(lis))
     
    8189
     

    Tell fiftyone to load the correct subset of OpenImageV6 dataset:

    -
    # https://voxel51.com/docs/fiftyone/user_guide/dataset_zoo/datasets.html#dataset-zoo-open-images-v6
    -dataset = foz.load_zoo_dataset(
    -    "open-images-v6",
    -    split="validation",
    -    # label_types=("detections", "classifications", "relationships", "segmentations") # this is the default
    -    image_ids=lis
    -)
    +
    # https://voxel51.com/docs/fiftyone/user_guide/dataset_zoo/datasets.html#dataset-zoo-open-images-v6
    +dataset = foz.load_zoo_dataset(
    +    "open-images-v6",
    +    split="validation",
    +    # label_types=("detections", "classifications", "relationships", "segmentations") # this is the default
    +    image_ids=lis
    +)
     
    Downloading split 'validation' to '/home/sampsa/fiftyone/open-images-v6/validation' if necessary
    @@ -447,8 +447,8 @@ 

    Contents

    Loading existing dataset 'open-images-v6-validation'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use
    -
    # take a look at the dataset
    -dataset
    +
    # take a look at the dataset
    +dataset
     
    Name:        open-images-v6-validation
    @@ -469,16 +469,16 @@ 

    Contents

    segmentations: fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)
    -
    # make dataset persistent .. next time you import fiftyone it's still available (loaded into the mongodb that's running in the background)
    -dataset.persistent=True
    +
    # make dataset persistent .. next time you import fiftyone it's still available (loaded into the mongodb that's running in the background)
    +dataset.persistent=True
     
    -
    # next time you need it, load it with:
    -dataset = fo.load_dataset("open-images-v6-validation")
    +
    # next time you need it, load it with:
    +dataset = fo.load_dataset("open-images-v6-validation")
     
    -
    # peek at first sample
    -dataset.first()
    +
    # peek at first sample
    +dataset.first()
     
    <Sample: {
    @@ -531,9 +531,9 @@ 

    Contents

    Let’s take a look where fiftyone downloaded the files

    -
    dir_=os.path.join(fodir,"open-images-v6")
    -print("contents of", dir_,":")
    -!tree --filelimit=10 $dir_ | cat
    +
    dir_=os.path.join(fodir,"open-images-v6")
    +print("contents of", dir_,":")
    +!tree --filelimit=10 $dir_ | cat
     
    contents of /home/sampsa/fiftyone/open-images-v6 :
    diff --git a/tutorials/encdec.html b/tutorials/encdec.html
    index a2926709..93133501 100644
    --- a/tutorials/encdec.html
    +++ b/tutorials/encdec.html
    @@ -348,46 +348,46 @@ 

    4. Creating an EncoderDecoder class

    write your own EncoderDecoder class and this is quite simple.

    Here we demo a simple EncoderDecoder class that encodes & decodes using jpeg.

    -
    import logging, io, cv2
    -import numpy as np
    -import matplotlib.pyplot as plt
    -from PIL import Image
    -from compressai_vision.evaluation.pipeline import EncoderDecoder
    +
    import logging, io, cv2
    +import numpy as np
    +import matplotlib.pyplot as plt
    +from PIL import Image
    +from compressai_vision.evaluation.pipeline import EncoderDecoder
     

    In the constructor, instantiate a logger and save the provided quality parameter.

    -
    class JpegEncoderDecoder(EncoderDecoder):
    +
    class JpegEncoderDecoder(EncoderDecoder):
     
    -    def __init__(self, qp=10):
    -        self.logger = logging.getLogger(self.__class__.__name__)
    -        self.qp=qp
    -        self.reset() # not used in this class
    +    def __init__(self, qp=10):
    +        self.logger = logging.getLogger(self.__class__.__name__)
    +        self.qp=qp
    +        self.reset() # not used in this class
     

    Define how the image is encoded + decoded and how the bitrate is calculated. We are using BGR since it is the default input format for Detectron2 predictors.

    -
    def BGR(self, bgr_image, tag=None):
    -    # bgr_image: numpy BGR24 image: (y,x,3)
    -    # tag could be used to identify images if we want to cache them
    -    # BGR -> RGB (as PIL works with RGB)
    -    rgb_image = bgr_image[:,:,::-1]
    -    pil_img=Image.fromarray(rgb_image).convert("RGB")
    -    tmp = io.BytesIO()
    -    # encode image
    -    pil_img.save(tmp, format="jpeg", quality=self.qp)
    -    tmp.seek(0)
    -    # calculate bits-per-pixel
    -    filesize = tmp.getbuffer().nbytes
    -    bpp = filesize * float(8) / (pil_img.size[0] * pil_img.size[1])
    -    # decode image back
    -    pil_img2 = Image.open(tmp).convert("RGB")
    -    # back to BGR
    -    rgb_image=np.array(pil_img2)
    -    bgr_image=rgb_image[:,:,::-1]
    -    # transformed image, bits-per-pixel ready
    -    return bgr_image, bpp
    +
    def BGR(self, bgr_image, tag=None):
    +    # bgr_image: numpy BGR24 image: (y,x,3)
    +    # tag could be used to identify images if we want to cache them
    +    # BGR -> RGB (as PIL works with RGB)
    +    rgb_image = bgr_image[:,:,::-1]
    +    pil_img=Image.fromarray(rgb_image).convert("RGB")
    +    tmp = io.BytesIO()
    +    # encode image
    +    pil_img.save(tmp, format="jpeg", quality=self.qp)
    +    tmp.seek(0)
    +    # calculate bits-per-pixel
    +    filesize = tmp.getbuffer().nbytes
    +    bpp = filesize * float(8) / (pil_img.size[0] * pil_img.size[1])
    +    # decode image back
    +    pil_img2 = Image.open(tmp).convert("RGB")
    +    # back to BGR
    +    rgb_image=np.array(pil_img2)
    +    bgr_image=rgb_image[:,:,::-1]
    +    # transformed image, bits-per-pixel ready
    +    return bgr_image, bpp
     

    So, we have a compact class that defines, in a single method, all @@ -396,20 +396,20 @@

    4. Creating an EncoderDecoder class

    with all the rest of the infrastructure provided by CompressAI-Vision library.

    Next, let’s see JpegEncoderDecoder in action.

    -
    bgr_image=cv2.imread("dog_512.png")
    +
    bgr_image=cv2.imread("dog_512.png")
     
    -
    encdec=JpegEncoderDecoder(qp=1)
    +
    encdec=JpegEncoderDecoder(qp=1)
     
    -
    transformed_bgr_image, bpp = encdec.BGR(bgr_image)
    +
    transformed_bgr_image, bpp = encdec.BGR(bgr_image)
     

    Print bits-per-pixel, compare original and transformed image

    -
    print("BPP=", bpp)
    -plt.figure(figsize=(20,20))
    -plt.subplot(1,2,1); plt.imshow(bgr_image[:,:,::-1]); _=plt.axis('off')
    -plt.subplot(1,2,2); plt.imshow(transformed_bgr_image[:,:,::-1]); _=plt.axis('off')
    +
    print("BPP=", bpp)
    +plt.figure(figsize=(20,20))
    +plt.subplot(1,2,1); plt.imshow(bgr_image[:,:,::-1]); _=plt.axis('off')
    +plt.subplot(1,2,2); plt.imshow(transformed_bgr_image[:,:,::-1]); _=plt.axis('off')
     
    BPP= 0.16878255208333334
    diff --git a/tutorials/encdec_nb.html b/tutorials/encdec_nb.html
    index ec2395bb..e86f9c47 100644
    --- a/tutorials/encdec_nb.html
    +++ b/tutorials/encdec_nb.html
    @@ -354,46 +354,46 @@ 

    Contents

    write your own EncoderDecoder class and this is quite simple.

    Here we demo a simple EncoderDecoder class that encodes & decodes using jpeg.

    -
    import logging, io, cv2
    -import numpy as np
    -import matplotlib.pyplot as plt
    -from PIL import Image
    -from compressai_vision.evaluation.pipeline import EncoderDecoder
    +
    import logging, io, cv2
    +import numpy as np
    +import matplotlib.pyplot as plt
    +from PIL import Image
    +from compressai_vision.evaluation.pipeline import EncoderDecoder
     

    In the constructor, instantiate a logger and save the provided quality parameter.

    -
    class JpegEncoderDecoder(EncoderDecoder):
    +
    class JpegEncoderDecoder(EncoderDecoder):
     
    -    def __init__(self, qp=10):
    -        self.logger = logging.getLogger(self.__class__.__name__)
    -        self.qp=qp
    -        self.reset() # not used in this class
    +    def __init__(self, qp=10):
    +        self.logger = logging.getLogger(self.__class__.__name__)
    +        self.qp=qp
    +        self.reset() # not used in this class
     

    Define how the image is encoded + decoded and how the bitrate is calculated. We are using BGR since it is the default input format for Detectron2 predictors.

    -
    def BGR(self, bgr_image, tag=None):
    -    # bgr_image: numpy BGR24 image: (y,x,3)
    -    # tag could be used to identify images if we want to cache them
    -    # BGR -> RGB (as PIL works with RGB)
    -    rgb_image = bgr_image[:,:,::-1]
    -    pil_img=Image.fromarray(rgb_image).convert("RGB")
    -    tmp = io.BytesIO()
    -    # encode image
    -    pil_img.save(tmp, format="jpeg", quality=self.qp)
    -    tmp.seek(0)
    -    # calculate bits-per-pixel
    -    filesize = tmp.getbuffer().nbytes
    -    bpp = filesize * float(8) / (pil_img.size[0] * pil_img.size[1])
    -    # decode image back
    -    pil_img2 = Image.open(tmp).convert("RGB")
    -    # back to BGR
    -    rgb_image=np.array(pil_img2)
    -    bgr_image=rgb_image[:,:,::-1]
    -    # transformed image, bits-per-pixel ready
    -    return bgr_image, bpp
    +
    def BGR(self, bgr_image, tag=None):
    +    # bgr_image: numpy BGR24 image: (y,x,3)
    +    # tag could be used to identify images if we want to cache them
    +    # BGR -> RGB (as PIL works with RGB)
    +    rgb_image = bgr_image[:,:,::-1]
    +    pil_img=Image.fromarray(rgb_image).convert("RGB")
    +    tmp = io.BytesIO()
    +    # encode image
    +    pil_img.save(tmp, format="jpeg", quality=self.qp)
    +    tmp.seek(0)
    +    # calculate bits-per-pixel
    +    filesize = tmp.getbuffer().nbytes
    +    bpp = filesize * float(8) / (pil_img.size[0] * pil_img.size[1])
    +    # decode image back
    +    pil_img2 = Image.open(tmp).convert("RGB")
    +    # back to BGR
    +    rgb_image=np.array(pil_img2)
    +    bgr_image=rgb_image[:,:,::-1]
    +    # transformed image, bits-per-pixel ready
    +    return bgr_image, bpp
     

    So, we have a compact class that defines, in a single method, all @@ -402,20 +402,20 @@

    Contents

    with all the rest of the infrastructure provided by CompressAI-Vision library.

    Next, let’s see JpegEncoderDecoder in action.

    -
    bgr_image=cv2.imread("dog_512.png")
    +
    bgr_image=cv2.imread("dog_512.png")
     
    -
    encdec=JpegEncoderDecoder(qp=1)
    +
    encdec=JpegEncoderDecoder(qp=1)
     
    -
    transformed_bgr_image, bpp = encdec.BGR(bgr_image)
    +
    transformed_bgr_image, bpp = encdec.BGR(bgr_image)
     

    Print bits-per-pixel, compare original and transformed image

    -
    print("BPP=", bpp)
    -plt.figure(figsize=(20,20))
    -plt.subplot(1,2,1); plt.imshow(bgr_image[:,:,::-1]); _=plt.axis('off')
    -plt.subplot(1,2,2); plt.imshow(transformed_bgr_image[:,:,::-1]); _=plt.axis('off')
    +
    print("BPP=", bpp)
    +plt.figure(figsize=(20,20))
    +plt.subplot(1,2,1); plt.imshow(bgr_image[:,:,::-1]); _=plt.axis('off')
    +plt.subplot(1,2,2); plt.imshow(transformed_bgr_image[:,:,::-1]); _=plt.axis('off')
     
    BPP= 0.16878255208333334
    diff --git a/tutorials/evaluate.html b/tutorials/evaluate.html
    index 25645641..af2d40a4 100644
    --- a/tutorials/evaluate.html
    +++ b/tutorials/evaluate.html
    @@ -346,78 +346,78 @@ 

    3. Evaluate

    In this tutorial we evaluate mAP values for a dataset with Detectron2 and a deep-learning encoding model from the CompressAI library. We also show how to perform a baseline evaluation with VTM.

    -
    # common libs
    -import math, os, io, json, cv2, random, logging, pickle, datetime
    -import numpy as np
    -# torch
    -import torch
    -# images
    -from PIL import Image
    -import matplotlib.pyplot as plt
    -# compressai
    -from compressai.zoo import bmshj2018_factorized
    +
    # common libs
    +import math, os, io, json, cv2, random, logging, pickle, datetime
    +import numpy as np
    +# torch
    +import torch
    +# images
    +from PIL import Image
    +import matplotlib.pyplot as plt
    +# compressai
    +from compressai.zoo import bmshj2018_factorized
     
    -
    ## *** Detectron imports ***
    -import detectron2
    -from detectron2.utils.logger import setup_logger
    -setup_logger()
    +
    ## *** Detectron imports ***
    +import detectron2
    +from detectron2.utils.logger import setup_logger
    +setup_logger()
     
    -# import some common detectron2 utilities
    -from detectron2 import model_zoo
    -from detectron2.engine import DefaultPredictor
    -from detectron2.config import get_cfg
    -from detectron2.utils.visualizer import Visualizer
    -from detectron2.data import MetadataCatalog, DatasetCatalog
    +# import some common detectron2 utilities
    +from detectron2 import model_zoo
    +from detectron2.engine import DefaultPredictor
    +from detectron2.config import get_cfg
    +from detectron2.utils.visualizer import Visualizer
    +from detectron2.data import MetadataCatalog, DatasetCatalog
     
    -
    # CompressAI-Vision
    -from compressai_vision.conversion import FO2DetectronDataset # convert fiftyone dataset to Detectron2 dataset
    -from compressai_vision.conversion import detectron251 # convert Detectron2 results to fiftyone format
    -from compressai_vision.evaluation.fo import annexPredictions # annex predictions from
    -from compressai_vision.evaluation.pipeline import CompressAIEncoderDecoder, VTMEncoderDecoder # a class that does encoding+decoding & returns the transformed image & bitrate
    -from compressai_vision.pipelines.remote_analysis.tools import confLogger, quickLog, getDataFile
    +
    # CompressAI-Vision
    +from compressai_vision.conversion import FO2DetectronDataset # convert fiftyone dataset to Detectron2 dataset
    +from compressai_vision.conversion import detectron251 # convert Detectron2 results to fiftyone format
    +from compressai_vision.evaluation.fo import annexPredictions # annex predictions from
    +from compressai_vision.evaluation.pipeline import CompressAIEncoderDecoder, VTMEncoderDecoder # a class that does encoding+decoding & returns the transformed image & bitrate
    +from compressai_vision.pipelines.remote_analysis.tools import confLogger, quickLog, getDataFile
     
    -
    # fiftyone
    -import fiftyone as fo
    -import fiftyone.zoo as foz
    +
    # fiftyone
    +import fiftyone as fo
    +import fiftyone.zoo as foz
     
    -
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    -print(device)
    +
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    +print(device)
     
    cpu
     
    -
    ## MODEL A
    -model_name="COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"
    -## look here:
    -## https://github.com/facebookresearch/detectron2/blob/main/MODEL_ZOO.md#faster-r-cnn
    -## for the line that says X101-FPN --> box AP is 43
    +
    ## MODEL A
    +model_name="COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"
    +## look here:
    +## https://github.com/facebookresearch/detectron2/blob/main/MODEL_ZOO.md#faster-r-cnn
    +## for the line that says X101-FPN --> box AP is 43
     
    -## MODEL B
    -# model_name="COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
    +## MODEL B
    +# model_name="COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
     
    -
    # cfg encapsulates the model architecture & weights, also threshold parameter, metadata, etc.
    -cfg = get_cfg()
    -cfg.MODEL.DEVICE=device
    -# load config from a file:
    -cfg.merge_from_file(model_zoo.get_config_file(model_name))
    -# DO NOT TOUCH THRESHOLD WHEN DOING EVALUATION:
    -# too big a threshold will cut the smallest values & affect the precision(recall) curves & evaluation results
    -# the default value is 0.05
    -# value of 0.01 saturates the results (they don't change at lower values)
    -# cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    -# get weights
    -cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_name)
    -print("expected input colorspace:", cfg.INPUT.FORMAT)
    -print("loaded datasets:", cfg.DATASETS)
    -model_dataset=cfg.DATASETS.TRAIN[0]
    -print("model was trained with", model_dataset)
    -model_meta=MetadataCatalog.get(model_dataset)
    +
    # cfg encapsulates the model architecture & weights, also threshold parameter, metadata, etc.
    +cfg = get_cfg()
    +cfg.MODEL.DEVICE=device
    +# load config from a file:
    +cfg.merge_from_file(model_zoo.get_config_file(model_name))
    +# DO NOT TOUCH THRESHOLD WHEN DOING EVALUATION:
    +# too big a threshold will cut the smallest values & affect the precision(recall) curves & evaluation results
    +# the default value is 0.05
    +# value of 0.01 saturates the results (they don't change at lower values)
    +# cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    +# get weights
    +cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_name)
    +print("expected input colorspace:", cfg.INPUT.FORMAT)
    +print("loaded datasets:", cfg.DATASETS)
    +model_dataset=cfg.DATASETS.TRAIN[0]
    +print("model was trained with", model_dataset)
    +model_meta=MetadataCatalog.get(model_dataset)
     
    expected input colorspace: BGR
    @@ -430,18 +430,18 @@ 

    3. Evaluate

    model was trained with coco_2017_train
    -
    # model_meta.thing_classes # check class labels this was trained with
    +
    # model_meta.thing_classes # check class labels this was trained with
     
    -
    predictor = DefaultPredictor(cfg)
    +
    predictor = DefaultPredictor(cfg)
     

    Get handle to a dataset. We will be using the oiv6-mpeg-v1 dataset. Please go through the CLI Tutorials in order to produce this dataset.

    -
    dataset = fo.load_dataset("oiv6-mpeg-detection-v1-dummy") # or use the dummy dataset for testing/debugging
    +
    dataset = fo.load_dataset("oiv6-mpeg-detection-v1-dummy") # or use the dummy dataset for testing/debugging
     
    -
    dataset
    +
    dataset
     
    Name:        oiv6-mpeg-detection-v1-dummy
    @@ -461,32 +461,32 @@ 

    3. Evaluate

    Set some loglevels

    -
    # loglev=logging.DEBUG
    -loglev=logging.INFO
    -quickLog("CompressAIEncoderDecoder", loglev)
    +
    # loglev=logging.DEBUG
    +loglev=logging.INFO
    +quickLog("CompressAIEncoderDecoder", loglev)
     
    <Logger CompressAIEncoderDecoder (INFO)>
     

    Get a list of labels in the dataset:

    -
    classes = dataset.distinct(
    -    "detections.detections.label"
    -)
    -print(classes)
    +
    classes = dataset.distinct(
    +    "detections.detections.label"
    +)
    +print(classes)
     
    ['airplane']
     
    -
    def per_class(results_obj):
    -    """helper function: take fiftyone/openimagev6 results object & spit
    -    out mAP breakdown as per class
    -    """
    -    d = {}
    -    for class_ in classes:
    -        d[class_] = results_obj.mAP([class_])
    -    return d
    +
    def per_class(results_obj):
    +    """helper function: take fiftyone/openimagev6 results object & spit
    +    out mAP breakdown as per class
    +    """
    +    d = {}
    +    for class_ in classes:
    +        d[class_] = results_obj.mAP([class_])
    +    return d
     

    CompressAIEncoderDecoder is a subclass of EncoderDecoder, @@ -501,48 +501,48 @@

    3. Evaluate

    image before the image is passed to the Detectron2 predictor.

    We run the bmshj2018_factorized model over various quality parameters:

    -
    params=[1] # debugging
    -# params=[1,2,3,4,5,6,7,8]
    +
    params=[1] # debugging
    +# params=[1,2,3,4,5,6,7,8]
     

    Detectron prediction results are saved during the run into the fiftyone (mongodb) database. Let’s define a unique name for the sample field where the detectron results are saved:

    -
    predictor_field='detectron-predictions'
    -
    -
    -
    xs=[]; ys=[]; maps=[]; # bpp, mAP values, mAP(s) per class
    -results=[] # complete results
    -for i in params:
    -    net = bmshj2018_factorized(quality=i, pretrained=True).eval().to(device)
    -    enc_dec = CompressAIEncoderDecoder(net, device=device)
    -    # note the EncoderDecoder instance here:
    -    # before the predictor is used, the image is crunched through the encoding/decoding process & the bitrate is recorded
    -    # you could substitute CompressAIEncoderDecoder with VTMEncoderDecoder if you'd like to (see also the end of this tutorial)
    -    print("running the detector at", i)
    -    bpp = annexPredictions(predictors=[predictor], fo_dataset=dataset, encoder_decoder=enc_dec, predictor_fields=[predictor_field])
    -    # .. now detectron's results are in each sample at the "detectron-predictions"  field
    -    res = dataset.evaluate_detections(
    -        predictor_field,
    -        gt_field="detections",
    -        method="open-images",
    -        pos_label_field="positive_labels",
    -        neg_label_field="negative_labels",
    -        expand_pred_hierarchy=False,
    -        expand_gt_hierarchy=False
    -    )
    -    results.append((i, bpp, res))
    -    # save to disk at each iteration as a backup just in case
    -    xs.append(bpp)
    -    ys.append(res.mAP())
    -    maps.append(per_class(res))
    -    with open("out.json","w") as f:
    -        f.write(json.dumps({
    -            "bpp" : xs,
    -            "map" : ys,
    -            "map_per_class" : maps
    -            }, indent=2))
    -print("ready!")
    +
    predictor_field='detectron-predictions'
    +
    +
    +
    xs=[]; ys=[]; maps=[]; # bpp, mAP values, mAP(s) per class
    +results=[] # complete results
    +for i in params:
    +    net = bmshj2018_factorized(quality=i, pretrained=True).eval().to(device)
    +    enc_dec = CompressAIEncoderDecoder(net, device=device)
    +    # note the EncoderDecoder instance here:
    +    # before the predictor is used, the image is crunched through the encoding/decoding process & the bitrate is recorded
    +    # you could substitute CompressAIEncoderDecoder with VTMEncoderDecoder if you'd like to (see also the end of this tutorial)
    +    print("running the detector at", i)
    +    bpp = annexPredictions(predictors=[predictor], fo_dataset=dataset, encoder_decoder=enc_dec, predictor_fields=[predictor_field])
    +    # .. now detectron's results are in each sample at the "detectron-predictions"  field
    +    res = dataset.evaluate_detections(
    +        predictor_field,
    +        gt_field="detections",
    +        method="open-images",
    +        pos_label_field="positive_labels",
    +        neg_label_field="negative_labels",
    +        expand_pred_hierarchy=False,
    +        expand_gt_hierarchy=False
    +    )
    +    results.append((i, bpp, res))
    +    # save to disk at each iteration as a backup just in case
    +    xs.append(bpp)
    +    ys.append(res.mAP())
    +    maps.append(per_class(res))
    +    with open("out.json","w") as f:
    +        f.write(json.dumps({
    +            "bpp" : xs,
    +            "map" : ys,
    +            "map_per_class" : maps
    +            }, indent=2))
    +print("ready!")
     
    running the detector at 1
    @@ -561,13 +561,13 @@ 

    3. Evaluate

    After the evaluation we can (and should!) remove the detectron results from the database:

    -
    dataset.delete_sample_fields(predictor_field)
    +
    dataset.delete_sample_fields(predictor_field)
     

    Load results

    -
    with open("out.json","r") as f:
    -    res=json.load(f)
    -print(res)
    +
    with open("out.json","r") as f:
    +    res=json.load(f)
    +print(res)
     
    {'bpp': [0.10060123042505593], 'map': [1.0], 'map_per_class': [{'airplane': 1.0}]}
    @@ -577,31 +577,31 @@ 

    3. Evaluate

    CompressAIEncoderDecoder with VTMEncoderDecoderin order to produce the anchor/baseline results. Let’s first set some variables for the VTM program:

    -
    # NOTE: set path_to_vtm_software
    -vtm_encoder_app=os.path.join(path_to_vtm_software, "bin/EncoderAppStatic")
    -vtm_decoder_app=os.path.join(path_to_vtm_software, "bin/DecoderAppStatic")
    -vtm_cfg=os.path.join(path_to_vtm_software, "cfg/encoder_intra_vtm.cfg")
    +
    # NOTE: set path_to_vtm_software
    +vtm_encoder_app=os.path.join(path_to_vtm_software, "bin/EncoderAppStatic")
    +vtm_decoder_app=os.path.join(path_to_vtm_software, "bin/DecoderAppStatic")
    +vtm_cfg=os.path.join(path_to_vtm_software, "cfg/encoder_intra_vtm.cfg")
     

    If you’d want to see what the VTM is doing exactly, enable debugging output:

    -
    loglev=logging.DEBUG
    -# loglev=logging.INFO
    -log=quickLog("VTMEncoderDecoder", loglev) # VTMEncoderDecoder
    +
    loglev=logging.DEBUG
    +# loglev=logging.INFO
    +log=quickLog("VTMEncoderDecoder", loglev) # VTMEncoderDecoder
     

    At each quality parameter in the loop, instantiate an VTMEncoderDecoder instead:

    -
    enc_dec = VTMEncoderDecoder(
    -    encoderApp=vtm_encoder_app,
    -    decoderApp=vtm_decoder_app,
    -    ffmpeg="ffmpeg",
    -    vtm_cfg=vtm_cfg,
    -    qp=47,
    -    cache="/tmp/bitstreams",
    -    scale=100,
    -    warn=True
    -)
    +
    enc_dec = VTMEncoderDecoder(
    +    encoderApp=vtm_encoder_app,
    +    decoderApp=vtm_decoder_app,
    +    ffmpeg="ffmpeg",
    +    vtm_cfg=vtm_cfg,
    +    qp=47,
    +    cache="/tmp/bitstreams",
    +    scale=100,
    +    warn=True
    +)
     
    VTMEncoderDecoder - WARNING - folder /tmp/bitstreams/100/47 exists already
    diff --git a/tutorials/evaluate_nb.html b/tutorials/evaluate_nb.html
    index 0cd41664..ea6313b2 100644
    --- a/tutorials/evaluate_nb.html
    +++ b/tutorials/evaluate_nb.html
    @@ -349,78 +349,78 @@ 

    Contents

    In this tutorial we evaluate mAP values for a dataset with Detectron2 and a deep-learning encoding model from the CompressAI library. We also show how to perform a baseline evaluation with VTM.

    -
    # common libs
    -import math, os, io, json, cv2, random, logging, pickle, datetime
    -import numpy as np
    -# torch
    -import torch
    -# images
    -from PIL import Image
    -import matplotlib.pyplot as plt
    -# compressai
    -from compressai.zoo import bmshj2018_factorized
    +
    # common libs
    +import math, os, io, json, cv2, random, logging, pickle, datetime
    +import numpy as np
    +# torch
    +import torch
    +# images
    +from PIL import Image
    +import matplotlib.pyplot as plt
    +# compressai
    +from compressai.zoo import bmshj2018_factorized
     
    -
    ## *** Detectron imports ***
    -import detectron2
    -from detectron2.utils.logger import setup_logger
    -setup_logger()
    +
    ## *** Detectron imports ***
    +import detectron2
    +from detectron2.utils.logger import setup_logger
    +setup_logger()
     
    -# import some common detectron2 utilities
    -from detectron2 import model_zoo
    -from detectron2.engine import DefaultPredictor
    -from detectron2.config import get_cfg
    -from detectron2.utils.visualizer import Visualizer
    -from detectron2.data import MetadataCatalog, DatasetCatalog
    +# import some common detectron2 utilities
    +from detectron2 import model_zoo
    +from detectron2.engine import DefaultPredictor
    +from detectron2.config import get_cfg
    +from detectron2.utils.visualizer import Visualizer
    +from detectron2.data import MetadataCatalog, DatasetCatalog
     
    -
    # CompressAI-Vision
    -from compressai_vision.conversion import FO2DetectronDataset # convert fiftyone dataset to Detectron2 dataset
    -from compressai_vision.conversion import detectron251 # convert Detectron2 results to fiftyone format
    -from compressai_vision.evaluation.fo import annexPredictions # annex predictions from
    -from compressai_vision.evaluation.pipeline import CompressAIEncoderDecoder, VTMEncoderDecoder # a class that does encoding+decoding & returns the transformed image & bitrate
    -from compressai_vision.pipelines.remote_analysis.tools import confLogger, quickLog, getDataFile
    +
    # CompressAI-Vision
    +from compressai_vision.conversion import FO2DetectronDataset # convert fiftyone dataset to Detectron2 dataset
    +from compressai_vision.conversion import detectron251 # convert Detectron2 results to fiftyone format
    +from compressai_vision.evaluation.fo import annexPredictions # annex predictions from
    +from compressai_vision.evaluation.pipeline import CompressAIEncoderDecoder, VTMEncoderDecoder # a class that does encoding+decoding & returns the transformed image & bitrate
    +from compressai_vision.pipelines.remote_analysis.tools import confLogger, quickLog, getDataFile
     
    -
    # fiftyone
    -import fiftyone as fo
    -import fiftyone.zoo as foz
    +
    # fiftyone
    +import fiftyone as fo
    +import fiftyone.zoo as foz
     
    -
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    -print(device)
    +
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    +print(device)
     
    cpu
     
    -
    ## MODEL A
    -model_name="COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"
    -## look here:
    -## https://github.com/facebookresearch/detectron2/blob/main/MODEL_ZOO.md#faster-r-cnn
    -## for the line that says X101-FPN --> box AP is 43
    +
    ## MODEL A
    +model_name="COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"
    +## look here:
    +## https://github.com/facebookresearch/detectron2/blob/main/MODEL_ZOO.md#faster-r-cnn
    +## for the line that says X101-FPN --> box AP is 43
     
    -## MODEL B
    -# model_name="COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
    +## MODEL B
    +# model_name="COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
     
    -
    # cfg encapsulates the model architecture & weights, also threshold parameter, metadata, etc.
    -cfg = get_cfg()
    -cfg.MODEL.DEVICE=device
    -# load config from a file:
    -cfg.merge_from_file(model_zoo.get_config_file(model_name))
    -# DO NOT TOUCH THRESHOLD WHEN DOING EVALUATION:
    -# too big a threshold will cut the smallest values & affect the precision(recall) curves & evaluation results
    -# the default value is 0.05
    -# value of 0.01 saturates the results (they don't change at lower values)
    -# cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    -# get weights
    -cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_name)
    -print("expected input colorspace:", cfg.INPUT.FORMAT)
    -print("loaded datasets:", cfg.DATASETS)
    -model_dataset=cfg.DATASETS.TRAIN[0]
    -print("model was trained with", model_dataset)
    -model_meta=MetadataCatalog.get(model_dataset)
    +
    # cfg encapsulates the model architecture & weights, also threshold parameter, metadata, etc.
    +cfg = get_cfg()
    +cfg.MODEL.DEVICE=device
    +# load config from a file:
    +cfg.merge_from_file(model_zoo.get_config_file(model_name))
    +# DO NOT TOUCH THRESHOLD WHEN DOING EVALUATION:
    +# too big a threshold will cut the smallest values & affect the precision(recall) curves & evaluation results
    +# the default value is 0.05
    +# value of 0.01 saturates the results (they don't change at lower values)
    +# cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
    +# get weights
    +cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_name)
    +print("expected input colorspace:", cfg.INPUT.FORMAT)
    +print("loaded datasets:", cfg.DATASETS)
    +model_dataset=cfg.DATASETS.TRAIN[0]
    +print("model was trained with", model_dataset)
    +model_meta=MetadataCatalog.get(model_dataset)
     
    expected input colorspace: BGR
    @@ -433,18 +433,18 @@ 

    Contents

    model was trained with coco_2017_train
    -
    # model_meta.thing_classes # check class labels this was trained with
    +
    # model_meta.thing_classes # check class labels this was trained with
     
    -
    predictor = DefaultPredictor(cfg)
    +
    predictor = DefaultPredictor(cfg)
     

    Get handle to a dataset. We will be using the oiv6-mpeg-v1 dataset. Please go through the CLI Tutorials in order to produce this dataset.

    -
    dataset = fo.load_dataset("oiv6-mpeg-detection-v1-dummy") # or use the dummy dataset for testing/debugging
    +
    dataset = fo.load_dataset("oiv6-mpeg-detection-v1-dummy") # or use the dummy dataset for testing/debugging
     
    -
    dataset
    +
    dataset
     
    Name:        oiv6-mpeg-detection-v1-dummy
    @@ -464,32 +464,32 @@ 

    Contents

    Set some loglevels

    -
    # loglev=logging.DEBUG
    -loglev=logging.INFO
    -quickLog("CompressAIEncoderDecoder", loglev)
    +
    # loglev=logging.DEBUG
    +loglev=logging.INFO
    +quickLog("CompressAIEncoderDecoder", loglev)
     
    <Logger CompressAIEncoderDecoder (INFO)>
     

    Get a list of labels in the dataset:

    -
    classes = dataset.distinct(
    -    "detections.detections.label"
    -)
    -print(classes)
    +
    classes = dataset.distinct(
    +    "detections.detections.label"
    +)
    +print(classes)
     
    ['airplane']
     
    -
    def per_class(results_obj):
    -    """helper function: take fiftyone/openimagev6 results object & spit
    -    out mAP breakdown as per class
    -    """
    -    d = {}
    -    for class_ in classes:
    -        d[class_] = results_obj.mAP([class_])
    -    return d
    +
    def per_class(results_obj):
    +    """helper function: take fiftyone/openimagev6 results object & spit
    +    out mAP breakdown as per class
    +    """
    +    d = {}
    +    for class_ in classes:
    +        d[class_] = results_obj.mAP([class_])
    +    return d
     

    CompressAIEncoderDecoder is a subclass of EncoderDecoder, @@ -504,48 +504,48 @@

    Contents

    image before the image is passed to the Detectron2 predictor.

    We run the bmshj2018_factorized model over various quality parameters:

    -
    params=[1] # debugging
    -# params=[1,2,3,4,5,6,7,8]
    +
    params=[1] # debugging
    +# params=[1,2,3,4,5,6,7,8]
     

    Detectron prediction results are saved during the run into the fiftyone (mongodb) database. Let’s define a unique name for the sample field where the detectron results are saved:

    -
    predictor_field='detectron-predictions'
    -
    -
    -
    xs=[]; ys=[]; maps=[]; # bpp, mAP values, mAP(s) per class
    -results=[] # complete results
    -for i in params:
    -    net = bmshj2018_factorized(quality=i, pretrained=True).eval().to(device)
    -    enc_dec = CompressAIEncoderDecoder(net, device=device)
    -    # note the EncoderDecoder instance here:
    -    # before the predictor is used, the image is crunched through the encoding/decoding process & the bitrate is recorded
    -    # you could substitute CompressAIEncoderDecoder with VTMEncoderDecoder if you'd like to (see also the end of this tutorial)
    -    print("running the detector at", i)
    -    bpp = annexPredictions(predictors=[predictor], fo_dataset=dataset, encoder_decoder=enc_dec, predictor_fields=[predictor_field])
    -    # .. now detectron's results are in each sample at the "detectron-predictions"  field
    -    res = dataset.evaluate_detections(
    -        predictor_field,
    -        gt_field="detections",
    -        method="open-images",
    -        pos_label_field="positive_labels",
    -        neg_label_field="negative_labels",
    -        expand_pred_hierarchy=False,
    -        expand_gt_hierarchy=False
    -    )
    -    results.append((i, bpp, res))
    -    # save to disk at each iteration as a backup just in case
    -    xs.append(bpp)
    -    ys.append(res.mAP())
    -    maps.append(per_class(res))
    -    with open("out.json","w") as f:
    -        f.write(json.dumps({
    -            "bpp" : xs,
    -            "map" : ys,
    -            "map_per_class" : maps
    -            }, indent=2))
    -print("ready!")
    +
    predictor_field='detectron-predictions'
    +
    +
    +
    xs=[]; ys=[]; maps=[]; # bpp, mAP values, mAP(s) per class
    +results=[] # complete results
    +for i in params:
    +    net = bmshj2018_factorized(quality=i, pretrained=True).eval().to(device)
    +    enc_dec = CompressAIEncoderDecoder(net, device=device)
    +    # note the EncoderDecoder instance here:
    +    # before the predictor is used, the image is crunched through the encoding/decoding process & the bitrate is recorded
    +    # you could substitute CompressAIEncoderDecoder with VTMEncoderDecoder if you'd like to (see also the end of this tutorial)
    +    print("running the detector at", i)
    +    bpp = annexPredictions(predictors=[predictor], fo_dataset=dataset, encoder_decoder=enc_dec, predictor_fields=[predictor_field])
    +    # .. now detectron's results are in each sample at the "detectron-predictions"  field
    +    res = dataset.evaluate_detections(
    +        predictor_field,
    +        gt_field="detections",
    +        method="open-images",
    +        pos_label_field="positive_labels",
    +        neg_label_field="negative_labels",
    +        expand_pred_hierarchy=False,
    +        expand_gt_hierarchy=False
    +    )
    +    results.append((i, bpp, res))
    +    # save to disk at each iteration as a backup just in case
    +    xs.append(bpp)
    +    ys.append(res.mAP())
    +    maps.append(per_class(res))
    +    with open("out.json","w") as f:
    +        f.write(json.dumps({
    +            "bpp" : xs,
    +            "map" : ys,
    +            "map_per_class" : maps
    +            }, indent=2))
    +print("ready!")
     
    running the detector at 1
    @@ -564,13 +564,13 @@ 

    Contents

    After the evaluation we can (and should!) remove the detectron results from the database:

    -
    dataset.delete_sample_fields(predictor_field)
    +
    dataset.delete_sample_fields(predictor_field)
     

    Load results

    -
    with open("out.json","r") as f:
    -    res=json.load(f)
    -print(res)
    +
    with open("out.json","r") as f:
    +    res=json.load(f)
    +print(res)
     
    {'bpp': [0.10060123042505593], 'map': [1.0], 'map_per_class': [{'airplane': 1.0}]}
    @@ -580,31 +580,31 @@ 

    Contents

    CompressAIEncoderDecoder with VTMEncoderDecoderin order to produce the anchor/baseline results. Let’s first set some variables for the VTM program:

    -
    # NOTE: set path_to_vtm_software
    -vtm_encoder_app=os.path.join(path_to_vtm_software, "bin/EncoderAppStatic")
    -vtm_decoder_app=os.path.join(path_to_vtm_software, "bin/DecoderAppStatic")
    -vtm_cfg=os.path.join(path_to_vtm_software, "cfg/encoder_intra_vtm.cfg")
    +
    # NOTE: set path_to_vtm_software
    +vtm_encoder_app=os.path.join(path_to_vtm_software, "bin/EncoderAppStatic")
    +vtm_decoder_app=os.path.join(path_to_vtm_software, "bin/DecoderAppStatic")
    +vtm_cfg=os.path.join(path_to_vtm_software, "cfg/encoder_intra_vtm.cfg")
     

    If you’d want to see what the VTM is doing exactly, enable debugging output:

    -
    loglev=logging.DEBUG
    -# loglev=logging.INFO
    -log=quickLog("VTMEncoderDecoder", loglev) # VTMEncoderDecoder
    +
    loglev=logging.DEBUG
    +# loglev=logging.INFO
    +log=quickLog("VTMEncoderDecoder", loglev) # VTMEncoderDecoder
     

    At each quality parameter in the loop, instantiate an VTMEncoderDecoder instead:

    -
    enc_dec = VTMEncoderDecoder(
    -    encoderApp=vtm_encoder_app,
    -    decoderApp=vtm_decoder_app,
    -    ffmpeg="ffmpeg",
    -    vtm_cfg=vtm_cfg,
    -    qp=47,
    -    cache="/tmp/bitstreams",
    -    scale=100,
    -    warn=True
    -)
    +
    enc_dec = VTMEncoderDecoder(
    +    encoderApp=vtm_encoder_app,
    +    decoderApp=vtm_decoder_app,
    +    ffmpeg="ffmpeg",
    +    vtm_cfg=vtm_cfg,
    +    qp=47,
    +    cache="/tmp/bitstreams",
    +    scale=100,
    +    warn=True
    +)
     
    VTMEncoderDecoder - WARNING - folder /tmp/bitstreams/100/47 exists already
    diff --git a/tutorials/fiftyone.html b/tutorials/fiftyone.html
    index 586e9977..2ad672f7 100644
    --- a/tutorials/fiftyone.html
    +++ b/tutorials/fiftyone.html
    @@ -371,18 +371,18 @@ 

    Fiftyone and MongoDB

    Let’s take a closer look:

    -
    # image tool imports
    -from PIL import Image
    -import matplotlib.pyplot as plt
    +
    # image tool imports
    +from PIL import Image
    +import matplotlib.pyplot as plt
     
    -
    # fiftyone
    -import fiftyone as fo
    -import fiftyone.zoo as foz
    +
    # fiftyone
    +import fiftyone as fo
    +import fiftyone.zoo as foz
     

    Lets take a look at the datasets registered to fiftyone:

    -
    fo.list_datasets()
    +
    fo.list_datasets()
     
    ['mpeg-vcm-detection',
    @@ -394,19 +394,19 @@ 

    Fiftyone and MongoDB

    Let’s get a handle to a dataset:

    -
    dataset=fo.load_dataset("quickstart")
    +
    dataset=fo.load_dataset("quickstart")
     

    Let’s see how many samples we have in it:

    -
    len(dataset)
    +
    len(dataset)
     
    200
     

    Let’s take a look at the first sample:

    -
    sample=dataset.first()
    -print(sample)
    +
    sample=dataset.first()
    +print(sample)
     
    <Sample: {
    @@ -696,7 +696,7 @@ 

    Fiftyone and MongoDB

    Let’s load an image:

    -
    plt.imshow(Image.open(sample["filepath"]))
    +
    plt.imshow(Image.open(sample["filepath"]))
     
    <matplotlib.image.AxesImage at 0x7fdf5a7fc640>
    @@ -705,7 +705,7 @@ 

    Fiftyone and MongoDB

    Let’s see a summary of the dataset and what kind of fields each samples has:

    -
    print(dataset)
    +
    print(dataset)
     
    Name:        quickstart
    @@ -731,14 +731,14 @@ 

    Fiftyone and MongoDB

    Here at the final, a small recompilation/cheatsheet of selected fiftyone features

    -
    # Access by sample id
    -sample=dataset["634472860faf93a9a586c9c4"]
    +
    # Access by sample id
    +sample=dataset["634472860faf93a9a586c9c4"]
     
    -
    # Search by a field value.  You might need this one with the with open_images_id field.
    -from fiftyone import ViewField as F
    -tmpset=dataset[F("filepath") == dataset.first().filepath]
    -print(tmpset)
    +
    # Search by a field value.  You might need this one with the with open_images_id field.
    +from fiftyone import ViewField as F
    +tmpset=dataset[F("filepath") == dataset.first().filepath]
    +print(tmpset)
     
    Dataset:     quickstart
    diff --git a/tutorials/fiftyone_nb.html b/tutorials/fiftyone_nb.html
    index 42feca9f..41f0b082 100644
    --- a/tutorials/fiftyone_nb.html
    +++ b/tutorials/fiftyone_nb.html
    @@ -377,18 +377,18 @@ 

    Contents

    sharing data and/or if you’re working in a supercomputing / grid environment.

    Let’s take a closer look:

    -
    # image tool imports
    -from PIL import Image
    -import matplotlib.pyplot as plt
    +
    # image tool imports
    +from PIL import Image
    +import matplotlib.pyplot as plt
     
    -
    # fiftyone
    -import fiftyone as fo
    -import fiftyone.zoo as foz
    +
    # fiftyone
    +import fiftyone as fo
    +import fiftyone.zoo as foz
     

    Lets take a look at the datasets registered to fiftyone:

    -
    fo.list_datasets()
    +
    fo.list_datasets()
     
    ['mpeg-vcm-detection',
    @@ -400,19 +400,19 @@ 

    Contents

    Let’s get a handle to a dataset:

    -
    dataset=fo.load_dataset("quickstart")
    +
    dataset=fo.load_dataset("quickstart")
     

    Let’s see how many samples we have in it:

    -
    len(dataset)
    +
    len(dataset)
     
    200
     

    Let’s take a look at the first sample:

    -
    sample=dataset.first()
    -print(sample)
    +
    sample=dataset.first()
    +print(sample)
     
    <Sample: {
    @@ -702,7 +702,7 @@ 

    Contents

    (instead of writing lots of intermediate files on the disk like with COCO API or with the tensorflow tools).

    Let’s load an image:

    -
    plt.imshow(Image.open(sample["filepath"]))
    +
    plt.imshow(Image.open(sample["filepath"]))
     
    <matplotlib.image.AxesImage at 0x7fdf5a7fc640>
    @@ -711,7 +711,7 @@ 

    Contents

    ../_images/fiftyone_nb_12_1.png

    Let’s see a summary of the dataset and what kind of fields each samples has:

    -
    print(dataset)
    +
    print(dataset)
     
    Name:        quickstart
    @@ -737,14 +737,14 @@ 

    Contents

    documentation

    Here at the final, a small recompilation/cheatsheet of selected fiftyone features

    -
    # Access by sample id
    -sample=dataset["634472860faf93a9a586c9c4"]
    +
    # Access by sample id
    +sample=dataset["634472860faf93a9a586c9c4"]
     
    -
    # Search by a field value.  You might need this one with the with open_images_id field.
    -from fiftyone import ViewField as F
    -tmpset=dataset[F("filepath") == dataset.first().filepath]
    -print(tmpset)
    +
    # Search by a field value.  You might need this one with the with open_images_id field.
    +from fiftyone import ViewField as F
    +tmpset=dataset[F("filepath") == dataset.first().filepath]
    +print(tmpset)
     
    Dataset:     quickstart
    diff --git a/tutorials/index.html b/tutorials/index.html
    index e53be9b4..8d99d19a 100644
    --- a/tutorials/index.html
    +++ b/tutorials/index.html
    @@ -400,42 +400,42 @@ 

    Input file conversionWe convert this data into OpenImageV6 format and also register it into fiftyone.

    In this chapter, we create an evaluation dataset as defined by the MPEG-VCM working group

    -
    # common libs
    -import math, os, io, json, cv2, random, logging
    -import numpy as np
    -# images
    -from PIL import Image
    -import matplotlib.pyplot as plt
    +
    # common libs
    +import math, os, io, json, cv2, random, logging
    +import numpy as np
    +# images
    +from PIL import Image
    +import matplotlib.pyplot as plt
     
    -
    homie=os.path.expanduser("~")
    -print("your home path is", homie)
    -fodir=os.path.join(homie,'fiftyone')
    -print("fiftyone dowloads data by default to", fodir)
    -try:
    -    os.mkdir(fodir)
    -except FileExistsError:
    -    pass
    +
    homie=os.path.expanduser("~")
    +print("your home path is", homie)
    +fodir=os.path.join(homie,'fiftyone')
    +print("fiftyone dowloads data by default to", fodir)
    +try:
    +    os.mkdir(fodir)
    +except FileExistsError:
    +    pass
     
    your home path is /home/sampsa
     fiftyone dowloads data by default to /home/sampsa/fiftyone
     
    -
    # fiftyone
    -import fiftyone as fo
    -import fiftyone.zoo as foz
    +
    # fiftyone
    +import fiftyone as fo
    +import fiftyone.zoo as foz
     
    -
    # CompressAI-Vision
    -from compressai_vision.conversion import MPEGVCMToOpenImageV6, imageIdFileList
    +
    # CompressAI-Vision
    +from compressai_vision.conversion import MPEGVCMToOpenImageV6, imageIdFileList
     

    We expect that you have downloaded correct images and segmentation masks into open-images-v6 folder (as instructed in the previous chapter)

    -
    dir_=os.path.join(fodir,"open-images-v6")
    -print("contents of", dir_,":")
    -!tree --filelimit=10 $dir_ | cat
    +
    dir_=os.path.join(fodir,"open-images-v6")
    +print("contents of", dir_,":")
    +!tree --filelimit=10 $dir_ | cat
     
    contents of /home/sampsa/fiftyone/open-images-v6 :
    @@ -470,36 +470,36 @@ 

    Input file conversiondetection_validation_input_5k.lst = list of images used

    -
    # TODO: define path_to_mpeg_vcm_files
    -path_to_images=os.path.join(fodir,"open-images-v6/validation/data")
    +
    # TODO: define path_to_mpeg_vcm_files
    +path_to_images=os.path.join(fodir,"open-images-v6/validation/data")
     
    -list_file=os.path.join(path_to_mpeg_vcm_files, "detection_validation_input_5k.lst")
    -bbox_csv_file=os.path.join(path_to_mpeg_vcm_files, "detection_validation_5k_bbox.csv")
    -validation_csv_file=os.path.join(path_to_mpeg_vcm_files, "detection_validation_labels_5k.csv")
    +list_file=os.path.join(path_to_mpeg_vcm_files, "detection_validation_input_5k.lst")
    +bbox_csv_file=os.path.join(path_to_mpeg_vcm_files, "detection_validation_5k_bbox.csv")
    +validation_csv_file=os.path.join(path_to_mpeg_vcm_files, "detection_validation_labels_5k.csv")
     
    -assert(os.path.exists(bbox_csv_file)), "can't find bbox file"
    -assert(os.path.exists(validation_csv_file)), "can't find labels file"
    -assert(os.path.exists(path_to_images)), "can't find image directory"
    +assert(os.path.exists(bbox_csv_file)), "can't find bbox file"
    +assert(os.path.exists(validation_csv_file)), "can't find labels file"
    +assert(os.path.exists(path_to_images)), "can't find image directory"
     

    Now we convert mpeg vmc proprietary format annotation into proper OpenImageV6 format dataset and place it into ~/fiftyone/mpeg_vcm-detection

    First, remove any previously imported stuff:

    -
    !rm -rf ~/fiftyone/mpeg-vcm-*
    +
    !rm -rf ~/fiftyone/mpeg-vcm-*
     
    -
    MPEGVCMToOpenImageV6(
    -    validation_csv_file=validation_csv_file,
    -    list_file=list_file,
    -    bbox_csv_file=bbox_csv_file,
    -    output_directory=os.path.join(fodir,"mpeg-vcm-detection"),
    -    data_dir=path_to_images
    -)
    +
    MPEGVCMToOpenImageV6(
    +    validation_csv_file=validation_csv_file,
    +    list_file=list_file,
    +    bbox_csv_file=bbox_csv_file,
    +    output_directory=os.path.join(fodir,"mpeg-vcm-detection"),
    +    data_dir=path_to_images
    +)
     

    let’s see what we got:

    -
    !tree --filelimit=10 ~/fiftyone/mpeg-vcm-detection | cat
    +
    !tree --filelimit=10 ~/fiftyone/mpeg-vcm-detection | cat
     
    /home/sampsa/fiftyone/mpeg-vcm-detection
    @@ -521,46 +521,46 @@ 

    Input file conversiondata -> ~/fiftyone/open-images-v6/validation/data)

    The only thing we’re left to do, is to register this OpenImageV6 formatted dataset into fiftyone:

    -
    # remove the dataset in the case it was already registered in fiftyone
    -try:
    -    fo.delete_dataset("mpeg-vcm-detection")
    -except ValueError as e:
    -    print("could not delete because of", e)
    +
    # remove the dataset in the case it was already registered in fiftyone
    +try:
    +    fo.delete_dataset("mpeg-vcm-detection")
    +except ValueError as e:
    +    print("could not delete because of", e)
     
    -
    dataset_type = fo.types.OpenImagesV6Dataset
    -dataset_dir = os.path.join(fodir,"mpeg-vcm-detection")
    -dataset = fo.Dataset.from_dir(
    -    dataset_dir=dataset_dir,
    -    dataset_type=dataset_type,
    -    label_types=("detections","classifications"),
    -    load_hierarchy=False,
    -    name="mpeg-vcm-detection",
    -    image_ids=imageIdFileList(list_file)
    -)
    +
    dataset_type = fo.types.OpenImagesV6Dataset
    +dataset_dir = os.path.join(fodir,"mpeg-vcm-detection")
    +dataset = fo.Dataset.from_dir(
    +    dataset_dir=dataset_dir,
    +    dataset_type=dataset_type,
    +    label_types=("detections","classifications"),
    +    load_hierarchy=False,
    +    name="mpeg-vcm-detection",
    +    image_ids=imageIdFileList(list_file)
    +)
     
    100% |███████████████| 5000/5000 [16.8s elapsed, 0s remaining, 290.4 samples/s]
     
    -
    dataset.persistent=True # without this, your dabatase will disappear!
    +
    dataset.persistent=True # without this, your dabatase will disappear!
     
    -
    ## now, in the future, just do
    -dataset = fo.load_dataset("mpeg-vcm-detection")
    +
    ## now, in the future, just do
    +dataset = fo.load_dataset("mpeg-vcm-detection")
     

    Finaly, let’s also create a dummy dataset for debugging and testing with only one sample:

    -
    try:
    -    fo.delete_dataset("mpeg-vcm-detection-dummy")
    -except ValueError:
    -    print("no dummmy dataset yet..")
    -dummy_dataset=fo.Dataset("mpeg-vcm-detection-dummy")
    -for sample in dataset[0:1]:
    -    dummy_dataset.add_sample(sample)
    -dummy_dataset.persistent=True
    -print("dummy dataset ok")
    +
    try:
    +    fo.delete_dataset("mpeg-vcm-detection-dummy")
    +except ValueError:
    +    print("no dummmy dataset yet..")
    +dummy_dataset=fo.Dataset("mpeg-vcm-detection-dummy")
    +for sample in dataset[0:1]:
    +    dummy_dataset.add_sample(sample)
    +dummy_dataset.persistent=True
    +print("dummy dataset ok")
     
    dummy dataset ok