% Hadi Amirpour % Encoding: utf-8 @Article{Taraghi2021a, author = {Babak Taraghi and Minh Nguyen and Hadi Amirpour and Christian Timmerer}, journal = {IEEE Access}, title = {{Intense: In-Depth Studies on Stall Events and Quality Switches and Their Impact on the Quality of Experience in {HTTP} Adaptive Streaming}}, year = {2021}, issn = {2169-3536}, month = aug, pages = {118087--118098}, volume = {9}, abstract = {With the recent growth of multimedia traffic over the Internet and emerging multimedia streaming service providers, improving Quality of Experience (QoE) for HTTP Adaptive Streaming (HAS) becomes more important. Alongside other factors, such as the media quality, HAS relies on the performance of the media player’s Adaptive Bitrate (ABR) algorithm to optimize QoE in multimedia streaming sessions. QoE in HAS suffers from weak or unstable internet connections and suboptimal ABR decisions. As a result of imperfect adaptiveness to the characteristics and conditions of the internet connection, stall events and quality level switches could occur and with different durations that negatively affect the QoE. In this paper, we address various identified open issues related to the QoE for HAS, notably (i) the minimum noticeable duration for stall events in HAS; (ii) the correlation between the media quality and the impact of stall events on QoE; (iii) the end-user preference regarding multiple shorter stall events versus a single longer stall event; and (iv) the end-user preference of media quality switches over stall events. Therefore, we have studied these open issues from both objective and subjective evaluation perspectives and presented the correlation between the two types of evaluations. The findings documented in this paper can be used as a baseline for improving ABR algorithms and policies in HAS.}, doi = {10.1109/access.2021.3107619}, keywords = {Crowdsourcing, HTTP adaptive streaming, quality of experience, quality switches, stall events, subjective evaluation, objective evaluation}, publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, url = {https://ieeexplore.ieee.org/document/9521894} } @InCollection{Shams2021, author = {Nakisa Shams and Hadi Amirpour and Christian Timmerer and Mohammad Ghanbari}, booktitle = {Proceedings of Sixth International Congress on Information and Communication Technology}, publisher = {Springer Singapore}, title = {{A Channel Allocation Algorithm for Cognitive Radio Users Based on Channel State Predictors}}, year = {2021}, month = {sep}, pages = {711--719}, volume = {235}, abstract = {Cognitive radio networks can efficiently manage the radio spectrum by utilizing the spectrum holes for secondary users in licensed frequency bands. The energy that is used to detect spectrum holes can be reduced considerably by predicting them. However, collisions can occur either between a primary user and secondary users or among the secondary users themselves. This paper introduces a centralized channel allocation algorithm (CCAA) in a scenario with multiple secondary users to control primary and secondary collisions. The proposed allocation algorithm, which uses a channel state predictor (CSP), provides good performance with fairness among the secondary users while they have minimal interference with the primary user. The simulation results show that the probability of a wrong prediction of an idle channel state in a multi-channel system is less than 0.9%. The channel state prediction saves the sensing energy by 73%, and the utilization of the spectrum can be improved by more than 77%.}, doi = {10.1007/978-981-16-2380-6_62}, keywords = {Cognitive radio, Neural networks, Prediction, Idle channel}, url = {https://link.springer.com/chapter/10.1007/978-981-16-2380-6_62} } @InProceedings{Pasandi2021a, author = {Hannaneh Barahouei Pasandi and Tamer Nadeem and Hadi Amirpour and Christian Timmerer}, booktitle = {Proceedings of the 27th Annual International Conference on Mobile Computing and Networking}, title = {{A cross-layer approach for supporting real-time multi-user video streaming over WLANs*}}, year = {2021}, month = {oct}, pages = {849--851}, publisher = {ACM}, abstract = {MU-MIMO is a high-speed technique in IEEE 802.11ac and upcoming 802.11ax technologies that improves spectral efficiency by allowing concurrent communication between one Access Point and multiple users. In this paper, we present MuVIS, a novel framework that proposes MU-MIMO-aware optimization for multi-user multimedia applications over IEEE 802.11ac/ax. Taking a cross-layer approach, MuVIS first optimizes the MU-MIMO user group selection for the users with the same characteristics in the PHY/MAC layer. It then optimizes the video bitrate for each group accordingly. We present our design and its evaluation on smartphones and laptops over 802.11ac WiFi.}, doi = {10.1145/3447993.3482868}, url = {https://dl.acm.org/doi/abs/10.1145/3447993.3482868} } @InProceedings{Pasandi2021, author = {Hannaneh Barahouei Pasandi and Hadi Amirpour and Tamer Nadeem and Christian Timmerer}, booktitle = {Proceedings of the Workshop on Design, Deployment, and Evaluation of Network-assisted Video Streaming}, title = {{Learning-driven MU-MIMO Grouping for Multi-User Multimedia Applications Over Commodity WiFi}}, year = {2021}, month = {dec}, pages = {15--21}, publisher = {ACM}, abstract = {MU-MIMO is a high-speed technique in IEEE 802.11ac and upcoming ax technologies that improves spectral efficiency by allowing concurrent communication between one Access Point and multiple users. In this paper, we present LATTE, a novel framework that proposes MU-MIMO-aware optimization for multi-user multimedia applications over IEEE 802.11ac/ax. Taking a cross-layer approach, LATTE first optimizes the MU-MIMO user group selection for the users with the same characteristics in the PHY/MAC layer. It then optimizes the video bitrate for each group accordingly. We present our design and its evaluation on smartphones and laptops over 802.11ac WiFi. Our experimental evaluations indicate that LATTE can outperform other video rate adaptation algorithms.}, doi = {10.1145/3488662.3493828}, url = {https://dl.acm.org/doi/10.1145/3488662.3493828} } @InProceedings{Menon2021a, author = {Vignesh V Menon and Hadi Amirpour and Mohammad Ghanbari and Christian Timmerer}, booktitle = {2021 IEEE International Conference on Image Processing (ICIP)}, title = {{Efficient Content-Adaptive Feature-Based Shot Detection for HTTP Adaptive Streaming}}, year = {2021}, month = {sep}, pages = {2174--2178}, publisher = {IEEE}, abstract = {Video delivery over the Internet has been becoming a commodity in recent years, owing to the widespread use of Dynamic Adaptive Streaming over HTTP (DASH). The DASH specification defines a hierarchical data model for Media Presentation Descriptions (MPDs) in terms of segments. This paper focuses on segmenting video into multiple shots for encoding in Video on Demand (VoD) HTTP Adaptive Streaming (HAS) applications. Therefore, we propose a novel Discrete Cosine Transform (DCT) feature-based shot detection and successive elimination algorithm for shot detection and compare it against the default shot detection algorithm of the x265 implementation of the High Efficiency Video Coding (HEVC) standard. Our experimental results demonstrate that our proposed feature-based pre-processor has a recall rate of 25% and an F-measure of 20% greater than the benchmark algorithm for shot detection.}, doi = {10.1109/icip42928.2021.9506092}, keywords = {HTTP Adaptive Streaming, Video-on-Demand, Shot detection, multi-shot encoding}, url = {https://ieeexplore.ieee.org/document/9506092} } @InProceedings{Menon2021, author = {Vignesh Menon and Hadi Amirpourazarian and Christian Timmerer and Mohammad Ghanbari}, booktitle = {2021 Picture Coding Symposium (PCS)}, title = {{Efficient Multi-Encoding Algorithms for HTTP Adaptive Bitrate Streaming}}, year = {2021}, month = jun, pages = {1--5}, publisher = {IEEE}, abstract = {Since video accounts for the majority of today’s internet traffic, the popularity of HTTP Adaptive Streaming (HAS) is increasing steadily. In HAS, each video is encoded at multiple bitrates and spatial resolutions (i.e., representations) to adapt to a heterogeneity of network conditions, device characteristics, and end-user preferences. Most of the streaming services utilize cloud-based encoding techniques which enable a fully parallel encoding process to speed up the encoding and consequently to reduce the overall time complexity. State-of-the-art approaches further improve the encoding process by utilizing encoder analysis information from already encoded representation(s) to improve the encoding time complexity of the remaining representations. In this paper, we investigate various multi-encoding algorithms (i.e., multi-rate and multi-resolution) and propose novel multi- encoding algorithms for large-scale HTTP Adaptive Streaming deployments. Experimental results demonstrate that the proposed multi-encoding algorithm optimized for the highest compression efficiency reduces the overall encoding time by 39% with a 1.5% bitrate increase compared to stand-alone encodings. Its optimized version for the highest time savings reduces the overall encoding time by 50% with a 2.6% bitrate increase compared to stand-alone encodings.}, doi = {10.1109/pcs50896.2021.9477499}, keywords = {HTTP Adaptive Streaming, HEVC, Multi-rate Encoding, Multi-encoding}, url = {https://ieeexplore.ieee.org/document/9477499} } @InProceedings{Farahani2021a, author = {Reza Farahani and Farzad Tashtarian and Hadi Amirpour and Christian Timmerer and Mohammad Ghanbari and Hellwagner, Hermann}, booktitle = {2021 IEEE 46th Conference on Local Computer Networks (LCN)}, title = {{CSDN: CDN-Aware QoE Optimization in SDN-Assisted HTTP Adaptive Video Streaming}}, year = {2021}, month = {oct}, pages = {525--532}, publisher = {IEEE}, abstract = {Recent studies have revealed that network-assisted techniques, by providing a comprehensive view of the network, improve HTTP Adaptive Streaming (HAS) system performance significantly. This paper leverages the capability of Software-Defined Networking, Network Function Virtualization, and edge computing to introduce a CDN-Aware QoE Optimization in SDN-Assisted Adaptive Video Streaming (CSDN) framework. We employ virtualized edge entities to collect various information items and run an optimization model with a new server/segment selection approach in a time-slotted fashion to serve the clients’ requests by selecting optimal cache servers. In case of a cache miss, a client’s request is served by an optimal replacement quality from a cache server, by a quality transcoded from an optimal replacement quality at the edge, or by the originally requested quality from the origin server. Comprehensive experiments conducted on a large-scale testbed demonstrate that CSDN outperforms other approaches in terms of the users’ QoE and network utilization.}, doi = {10.1109/lcn52139.2021.9524970}, keywords = {Dynamic Adaptive Streaming over HTTP (DASH), Edge Computing, Network-Assisted Video Streaming, Quality of Experience (QoE), Software Defined Networking (SDN), Network Function Virtualization (NFV), Video Transcoding, Content Delivery Network (CDN)}, url = {https://ieeexplore.ieee.org/document/9524970} } @InProceedings{Erfanian2021b, author = {Alireza Erfanian and Hadi Amirpour and Farzad Tashtarian and Christian Timmerer and Hellwagner, Hermann}, booktitle = {Proceedings of the Workshop on Design, Deployment, and Evaluation of Network-assisted Video Streaming}, title = {{LwTE-Live: Light-weight Transcoding at the Edge for Live Streaming}}, year = {2021}, month = {dec}, pages = {22--28}, publisher = {ACM}, abstract = {Live video streaming is widely embraced in video services, and its applications have attracted much attention in recent years. The increased number of users demanding high quality (e.g., 4K resolution) live videos increases the bandwidth utilization in the backhaul network. To decrease bandwidth utilization in HTTP Adaptive Streaming (HAS), in on-the-fly transcoding approaches, only the highest bitrate representation is delivered to the edge, and other representations are generated by transcoding at the edge. However, this approach is inefficient due to the high transcoding cost. In this paper, we propose a light-weight transcoding at the edge method for live applications, LwTE-Live, to decrease the bandwidth utilization and the overall live streaming cost. During the encoding processes at the origin server, the optimal encoding decisions are saved as metadata and the metadata replaces the corresponding representation in the bitrate ladder. The significantly reduced size of the metadata compared to its corresponding representation decreases the bandwidth utilization. The extracted metadata is then utilized at the edge to decrease the transcoding time. We formulate the problem as a Mixed-Binary Linear Programming (MBLP) model to optimize the live streaming cost, including the bandwidth and computation costs. We compare the proposed model with state-of-the-art approaches, and the experimental results show that our proposed method saves the cost and backhaul bandwidth utilization up to 34% and 45%, respectively.}, doi = {10.1145/3488662.3493829}, url = {https://dl.acm.org/doi/10.1145/3488662.3493829} } @Article{Erfanian2021a, author = {Alireza Erfanian and Hadi Amirpour and Farzad Tashtarian and Christian Timmerer and Hellwagner, Hermann}, journal = {IEEE Access}, title = {{LwTE: Light-Weight Transcoding at the Edge}}, year = {2021}, issn = {2169-3536}, month = aug, pages = {112276--112289}, volume = {9}, abstract = {Due to the growing demand for video streaming services, providers have to deal with increasing resource requirements for increasingly heterogeneous environments. To mitigate this problem, many works have been proposed which aim to ( i ) improve cloud/edge caching efficiency, (ii) use computation power available in the cloud/edge for on-the-fly transcoding, and (iii) optimize the trade-off among various cost parameters, e.g., storage, computation, and bandwidth. In this paper, we propose LwTE, a novel L ight- w eight T ranscoding approach at the E dge, in the context of HTTP Adaptive Streaming (HAS). During the encoding process of a video segment at the origin side, computationally intense search processes are going on. The main idea of LwTE is to store the optimal results of these search processes as metadata for each video bitrate and reuse them at the edge servers to reduce the required time and computational resources for on-the-fly transcoding. LwTE enables us to store only the highest bitrate plus corresponding metadata (of very small size) for unpopular video segments/bitrates. In this way, in addition to the significant reduction in bandwidth and storage consumption, the required time for on-the-fly transcoding of a requested segment is remarkably decreased by utilizing its corresponding metadata; unnecessary search processes are avoided. Popular video segments/bitrates are being stored. We investigate our approach for Video-on-Demand (VoD) streaming services by optimizing storage and computation (transcoding) costs at the edge servers and then compare it to conventional methods (store all bitrates, partial transcoding). The results indicate that our approach reduces the transcoding time by at least 80% and decreases the aforementioned costs by 12% to 70% compared to the state-of-the-art approaches.}, doi = {10.1109/access.2021.3102633}, keywords = {Video Streaming, transcoding, video on demand, edge computing}, publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, url = {https://ieeexplore.ieee.org/document/9507473} } @Article{Cetinkaya2021a, author = {Ekrem Cetinkaya and Hadi Amirpour and Christian Timmerer and Mohammad Ghanbari}, journal = {IEEE Open Journal of Signal Processing}, title = {{Fast Multi-Resolution and Multi-Rate Encoding for HTTP Adaptive Streaming Using Machine Learning}}, year = {2021}, issn = {2644-1322}, month = jun, pages = {1--12}, abstract = {Video streaming applications keep getting more attention over the years, and HTTP Adaptive Streaming (HAS) became the de-facto solution for video delivery over the Internet. In HAS, each video is encoded at multiple quality levels and resolutions (i.e., representations) to enable adaptation of the streaming session to viewing and network conditions of the client. This requirement brings encoding challenges along with it, e.g., a video source should be encoded efficiently at multiple bitrates and resolutions. Fast multi-rate encoding approaches aim to address this challenge of encoding multiple representations from a single video by re-using information from already encoded representations. In this paper, a convolutional neural network is used to speed up both multi-rate and multi-resolution encoding for HAS. For multi-rate encoding, the lowest bitrate representation is chosen as the reference. For multi-resolution encoding, the highest bitrate from the lowest resolution representation is chosen as the reference. Pixel values from the target resolution and encoding information from the reference representation are used to predict Coding Tree Unit (CTU) split decisions in High-Efficiency Video Coding (HEVC) for dependent representations. Experimental results show that the proposed method for multi-rate encoding can reduce the overall encoding time by 15.08 % and parallel encoding time by 41.26 %, with a 0.89 % bitrate increase compared to the HEVC reference software. Simultaneously, the proposed method for multi-resolution encoding can reduce the encoding time by 46.27 % for the overall encoding and 27.71 % for the parallel encoding on average with a 2.05 % bitrate increase.}, doi = {10.1109/ojsp.2021.3078657}, keywords = {HTTP Adaptive Streaming, HEVC, Multirate Encoding, Machine Learning}, publisher = {Institute of Electrical and Electronics Engineers (IEEE)}, url = {https://ieeexplore.ieee.org/document/9427195} } @Article{Cetinkaya2021, author = {Ekrem Cetinkaya and Hadi Amirpour and Mohammad Ghanbari and Christian Timmerer}, journal = {Signal Processing: Image Communication}, title = {{CTU depth decision algorithms for HEVC: A survey}}, year = {2021}, issn = {0923-5965}, month = {nov}, pages = {116442}, volume = {99}, abstract = {High Efficiency Video Coding (HEVC) surpasses its predecessors in encoding efficiency by introducing new coding tools at the cost of an increased encoding time-complexity. The Coding Tree Unit (CTU) is the main building block used in HEVC. In the HEVC standard, frames are divided into CTUs with the predetermined size of up to 64 × 64 pixels. Each CTU is then divided recursively into a number of equally sized square areas, known as Coding Units (CUs). Although this diversity of frame partitioning increases encoding efficiency, it also causes an increase in the time complexity due to the increased number of ways to find the optimal partitioning. To address this complexity, numerous algorithms have been proposed to eliminate unnecessary searches during partitioning CTUs by exploiting the correlation in the video. In this paper, existing CTU depth decision algorithms for HEVC are surveyed. These algorithms are categorized into two groups, namely statistics and machine learning approaches. Statistics approaches are further subdivided into neighboring and inherent approaches. Neighboring approaches exploit the similarity between adjacent CTUs to limit the depth range of the current CTU, while inherent approaches use only the available information within the current CTU. Machine learning approaches try to extract and exploit similarities implicitly. Traditional methods like support vector machines or random forests use manually selected features, while recently proposed deep learning methods extract features during training. Finally, this paper discusses extending these methods to more recent video coding formats such as Versatile Video Coding (VVC) and AOMedia Video 1(AV1).}, doi = {10.1016/j.image.2021.116442}, keywords = {HEVC, Coding tree unit, Complexity, CTU partitioning, Statistics, Machine learning}, publisher = {Elsevier BV}, url = {https://www.sciencedirect.com/science/article/pii/S0923596521002113} } @InCollection{Amirpourazarian2021b, author = {Hadi Amirpour and Ekrem Cetinkaya and Christian Timmerer and Mohammad Ghanbari}, booktitle = {Proceedings of the 27th Internationl Conference on Multimedia Modeling (MMM 2021)}, publisher = {Springer International Publishing}, title = {{Towards Optimal Multirate Encoding for {HTTP} Adaptive Streaming}}, year = {2021}, month = jan, number = {12572}, pages = {469--480}, series = {Lecture Notes in Computer Science}, abstract = {HTTP Adaptive Streaming (HAS) enables high quality stream-ing of video contents. In HAS, videos are divided into short intervalscalled segments, and each segment is encoded at various quality/bitratesto adapt to the available bandwidth. Multiple encodings of the same con-tent imposes high cost for video content providers. To reduce the time-complexity of encoding multiple representations, state-of-the-art methods typically encode the highest quality representation first and reusethe information gathered during its encoding to accelerate the encodingof the remaining representations. As encoding the highest quality rep-resentation requires the highest time-complexity compared to the lowerquality representations, it would be a bottleneck in parallel encoding scenarios and the overall time-complexity will be limited to the time-complexity of the highest quality representation. In this paper and toaddress this problem, we consider all representations from the highestto the lowest quality representation as a potential, single reference toaccelerate the encoding of the other, dependent representations. We for-mulate a set of encoding modes and assess their performance in terms ofBD-Rate and time-complexity, using both VMAF and PSNR as objec-tive metrics. Experimental results show that encoding a middle qualityrepresentation as a reference, can significantly reduce the maximum en-coding complexity and hence it is an efficient way of encoding multiplerepresentations in parallel. Based on this fact, a fast multirate encodingmethod is proposed which utilizes depth and prediction mode of a middle quality representation to accelerate the encoding of the dependentrepresentations.}, doi = {10.1007/978-3-030-67832-6_38}, keywords = {HEVC, Video Encoding, Multirate Encoding, DASH}, url = {https://link.springer.com/chapter/10.1007/978-3-030-67832-6_38} } @InProceedings{Amirpourazarian2021a, author = {Hadi Amirpourazarian and Christian Timmerer and Mohammad Ghanbari}, booktitle = {2021 Data Compression Conference (DCC)}, title = {{SLFC: Scalable Light Field Coding}}, year = {2021}, month = {mar}, pages = {43-52}, publisher = {IEEE}, abstract = {Light field imaging enables some post-processing capabilities like refocusing, changing view perspective, and depth estimation. As light field images are represented by multiple views they contain a huge amount of data that makes compression inevitable. Although there are some proposals to efficiently compress light field images, their main focus is on encoding efficiency. However, some important functionalities such as viewpoint and quality scalabilities, random access, and uniform quality distribution have not been addressed adequately. In this paper, an efficient light field image compression method based on a deep neural network is proposed, which classifies multiple views into various layers. In each layer, the target view is synthesized from the available views of previously encoded/decoded layers using a deep neural network. This synthesized view is then used as a virtual reference for the target view inter-coding. In this way, random access to an arbitrary view is provided. Moreover, uniform quality distribution among multiple views is addressed. In higher bitrates where random access to an arbitrary view is more crucial, the required bitrate to access the requested view is minimized.}, doi = {10.1109/dcc50243.2021.00012}, keywords = {Light field, Compression, Scalable, Random Access}, url = {https://ieeexplore.ieee.org/document/9418753} } @InProceedings{Amirpourazarian2021, author = {Hadi Amirpourazarian and Christian Timmerer and Mohammad Ghanbari}, booktitle = {2021 IEEE International Conference on Multimedia and Expo (ICME)}, title = {{PSTR: Per-Title Encoding Using Spatio-Temporal Resolutions}}, year = {2021}, month = jun, pages = {1--6}, publisher = {IEEE}, abstract = {Current per-title encoding schemes encode the same video content (or snippets/subsets thereof) at various bitrates and spatial resolutions to find an optimal bitrate ladder for each video content. Compared to traditional approaches, in which a predefined, content-agnostic ("fit-to-all") encoding ladder is applied to all video contents, per-title encoding can result in (i) a significant decrease of storage and delivery costs and (ii) an increase in the Quality of Experience (QoE). In the current per-title encoding schemes, the bitrate ladder is optimized using only spatial resolutions, while we argue that with the emergence of high framerate videos, this principle can be extended to temporal resolutions as well. In this paper, we improve the per-title encoding for each content using spatio-temporal resolutions. Experimental results show that our proposed approach doubles the performance of bitrate saving by considering both temporal and spatial resolutions compared to considering only spatial resolutions.}, doi = {10.1109/icme51207.2021.9428247}, keywords = {Bitrate ladder, per-title encoding, framerate, spatial resolution}, url = {https://ieeexplore.ieee.org/document/9428247} } @InProceedings{Amirpour2021b, author = {Hadi Amirpour and Raimund Schatz and Christian Timmerer and Mohammad Ghanbari}, booktitle = {2021 International Conference on Visual Communications and Image Processing (VCIP)}, title = {{On the Impact of Viewing Distance on Perceived Video Quality}}, year = {2021}, month = {dec}, pages = {1--5}, publisher = {IEEE}, abstract = {Due to the growing importance of optimizing the quality and efficiency of video streaming delivery, accurate assessment of user-perceived video quality becomes increasingly important. However, due to the wide range of viewing distances encountered in real-world viewing settings, the perceived video quality can vary significantly in everyday viewing situations. In this paper, we investigate and quantify the influence of viewing distance on perceived video quality. A subjective experiment was conducted with full HD sequences at three different fixed viewing distances, with each video sequence being encoded at three different quality levels. Our study results confirm that the viewing distance has a significant influence on the quality assessment. In particular, they show that an increased viewing distance generally leads to increased perceived video quality, especially at low media encoding quality levels. In this context, we also provide an estimation of potential bitrate savings that knowledge of actual viewing distance would enable in practice. Since current objective video quality metrics do not systematically take into account viewing distance, we also analyze and quantify the influence of viewing distance on the correlation between objective and subjective metrics. Our results confirm the need for distance-aware objective metrics when the accurate prediction of perceived video quality in real-world environments is required.}, doi = {10.1109/vcip53242.2021.9675431}, keywords = {Measurement, Image coding, Visual communication, Video sequences, Estimation, Streaming media, Media, video streaming, QoE, viewing distance, subjective testing}, url = {https://ieeexplore.ieee.org/document/9675431} } @InProceedings{Amirpour2021a, author = {Hadi Amirpour and Hannaneh Barahouei Pasandi and Christian Timmerer and Mohammad Ghanbari}, booktitle = {2021 International Conference on Visual Communications and Image Processing (VCIP)}, title = {{Improving Per-title Encoding for HTTP Adaptive Streaming by Utilizing Video Super-resolution}}, year = {2021}, month = {dec}, pages = {1--5}, publisher = {IEEE}, abstract = {In per-title encoding, to optimize a bitrate ladder over spatial resolution, each video segment is downscaled to a set of spatial resolutions, and they are all encoded at a given set of bitrates. To find the highest quality resolution for each bitrate, the low-resolution encoded videos are upscaled to the original resolution, and a convex hull is formed based on the scaled qualities. Deep learning-based video super-resolution (VSR) approaches show a significant gain over traditional upscaling approaches, and they are becoming more and more efficient over time. This paper improves the per-title encoding over the upscaling methods by using deep neural network-based VSR algorithms. Utilizing a VSR algorithm by improving the quality of low-resolution encodings can improve the convex hull. As a result, it will lead to an improved bitrate ladder. To avoid bandwidth wastage at perceptually lossless bitrates, a maximum threshold for the quality is set, and encodings beyond it are eliminated from the bitrate ladder. Similarly, a minimum threshold is set to avoid low-quality video delivery. The encodings between the maximum and minimum thresholds are selected based on one Just Noticeable Difference. Our experimental results show that the proposed per-title encoding results in a 24% bitrate reduction and 53% storage reduction compared to the state-of-the-art method.}, doi = {10.1109/vcip53242.2021.9675403}, keywords = {Image coding, Visual communication, Bit rate, Superresolution, Bandwidth, Streaming media, Spatial resolution, HAS, per-title, deep learning, compression, bitrate ladder}, url = {https://ieeexplore.ieee.org/document/9675403} } @InProceedings{Nguyen2020, author = {Minh Nguyen and Hadi Amirpour and Christian Timmerer and Hellwagner, Hermann}, booktitle = {Proceedings of the Workshop on the Evolution, Performance, and Interoperability of QUIC}, title = {{Scalable High Efficiency Video Coding based HTTP Adaptive Streaming over QUIC}}, year = {2020}, month = {aug}, pages = {28--34}, publisher = {ACM}, abstract = {HTTP/2 has been explored widely for adaptive video streaming, but still suffers from Head-of-Line blocking, and three-way handshake delay due to TCP. Meanwhile, QUIC running on top of UDP can tackle these issues. In addition, although many adaptive bitrate (ABR) algorithms have been proposed for scalable and non-scalable video streaming, the literature lacks an algorithm designed for both types of video streaming approaches. In this paper, we investigate the impact of QUIC and HTTP/2 on the performance of ABR algorithms. Moreover, we propose an efficient approach for utilizing scalable video coding formats for adaptive video streaming that combines a traditional video streaming approach (based on non-scalable video coding formats) and a retransmission technique. The experimental results show that QUIC benefits significantly from our proposed method in the context of packet loss and retransmission. Compared to HTTP/2, it improves the average video quality and provides a smoother adaptation behavior. Finally, we demonstrate that our proposed method originally designed for non-scalable video codecs also works efficiently for scalable videos such as Scalable High Efficiency Video Coding (SHVC).}, doi = {10.1145/3405796.3405829}, keywords = {QUIC, H2BR, HTTP adaptive streaming, Retransmission, SHVC}, url = {https://dl.acm.org/doi/10.1145/3405796.3405829} } @InProceedings{Ghamsarian2020, author = {Negin Ghamsarian and Hadi Amirpourazarian and Christian Timmerer and Mario Taschwer and Klaus Schöffmann}, booktitle = {Proceedings of the 28th ACM International Conference on Multimedia}, title = {{Relevance-Based Compression of Cataract Surgery Videos Using Convolutional Neural Networks}}, year = {2020}, month = {oct}, pages = {3577--3585}, publisher = {ACM}, abstract = {Recorded cataract surgery videos play a prominent role in training and investigating the surgery, and enhancing the surgical outcomes. Due to storage limitations in hospitals, however, the recorded cataract surgeries are deleted after a short time and this precious source of information cannot be fully utilized. Lowering the quality to reduce the required storage space is not advisable since the degraded visual quality results in the loss of relevant information that limits the usage of these videos. To address this problem, we propose a relevance-based compression technique consisting of two modules: (i) relevance detection, which uses neural networks for semantic segmentation and classification of the videos to detect relevant spatio-temporal information, and (ii) content-adaptive compression, which restricts the amount of distortion applied to the relevant content while allocating less bitrate to irrelevant content. The proposed relevance-based compression framework is implemented considering five scenarios based on the definition of relevant information from the target audience's perspective. Experimental results demonstrate the capability of the proposed approach in relevance detection. We further show that the proposed approach can achieve high compression efficiency by abstracting substantial redundant information while retaining the high quality of the relevant content.}, doi = {10.1145/3394171.3413658}, keywords = {Convolutional Neural Networks, ROI Detection, Video Coding, HEVC, Medical Multimedia}, url = {https://dl.acm.org/doi/10.1145/3394171.3413658} } @InProceedings{Cetinkaya2020, author = {Ekrem Cetinkaya and Hadi Amirpour and Christian Timmerer and Mohammad Ghanbari}, booktitle = {2020 IEEE International Conference on Visual Communications and Image Processing (VCIP)}, title = {{FaME-ML: Fast Multirate Encoding for HTTP Adaptive Streaming Using Machine Learning}}, year = {2020}, month = {dec}, pages = {87--90}, publisher = {{IEEE}}, abstract = {HTTP Adaptive Streaming(HAS) is the most common approach for delivering video content over the Internet. The requirement to encode the same content at different quality levels (i.e., representations) in HAS is a challenging problem for content providers. Fast multirate encoding approaches try to accelerate this process by reusing information from previously encoded representations. In this paper, we propose to use convolutional neural networks (CNNs) to speed up the encoding of multiple representations with a specific focus on parallel encoding. In parallel encoding, the overall time-complexity is limited to the maximum time-complexity of one of the representations that are encoded in parallel. Therefore, instead of reducing the time-complexity for all representations, the highest time-complexities are reduced. Experimental results show that FaME-ML achieves significant time-complexity savings in parallel encoding scenarios(41%in average) with a slight increase in bitrate and quality degradation compared to the HEVC reference software.}, doi = {10.1109/vcip49819.2020.9301850}, keywords = {HEVC, Multirate Encoding, Machine Learning, DASH, HTTP Adaptive Streaming, HAS}, url = {https://ieeexplore.ieee.org/abstract/document/9301850} } @InProceedings{Amirpour_2020, author = {Hadi Amirpour and Ekrem Cetinkaya and Christian Timmerer and Mohammad Ghanbari}, booktitle = {2020 Data Compression Conference (DCC)}, title = {{Fast Multi-rate Encoding for Adaptive HTTP Streaming}}, year = {2020}, month = {mar}, publisher = {IEEE}, abstract = {Adaptive HTTP streaming is the preferred method to deliver multimedia content in the internet. It provides multiple representations of the same content in different qualities (i.e. bit-rates and resolutions) and allows the client to request segments from the available representations in a dynamic, adaptive way depending on its context. The growing number of representations in adaptive HTTP streaming makes encoding of one video segment at different representations a challenging task in terms of encoding time-complexity. In this paper, information of both highest and lowest quality representations are used to limit Rate Distortion Optimization (RDO) for each Coding Unit Tree (CTU) in High Efficiency Video Coding. Our proposed method first encodes the highest quality representation and consequently uses it to encode the lowest quality representation. In particular, the block structure and the selected reference frame of both highest and lowest quality representations are then used to predict and shorten the RDO process of each CTU for intermediate quality representations. Our proposed method introduces a delay of two CTUs thanks to employing parallel processing techniques. Experimental results show significant reduction in time-complexity over the reference software 38% and the state-of-the-art 10% while quality degradation is negligible.}, doi = {10.1109/dcc47342.2020.00080}, keywords = {HTTP adaptive streaming, Multi-rate encoding, HEVC, Fast block partitioning}, url = {https://ieeexplore.ieee.org/document/9105709} } @InProceedings{Amirpour2020, author = {Hadi Amirpour and Christian Timmerer and Mohammad Ghanbari}, booktitle = {2020 IEEE International Conference on Multimedia & Expo Workshops (ICMEW)}, title = {{Towards View-Aware Adaptive Streaming of Holographic Content}}, year = {2020}, month = {jul}, publisher = {IEEE}, abstract = {Holography is able to reconstruct a three-dimensional structure of an object by recording full wave fields of light emitted from the object. This requires a huge amount of data to be encoded, stored, transmitted, and decoded for holographic content, making its practical usage challenging especially for bandwidth-constrained networks and memory-limited devices. In the delivery of holographic content via the internet, bandwidth wastage should be avoided to tackle high bandwidth demands of holography streaming. For real-time applications, encoding time-complexity is also a major problem. In this paper, the concept of dynamic adaptive streaming over HTTP (DASH) is extended to holography image streaming and view-aware adaptation techniques are studied. As each area of a hologram contains information of a specific view, instead of encoding and decoding the entire hologram, just the part required to render the selected view is encoded and transmitted via the network based on the users’ interactivity. Four different strategies, namely, monolithic, single view, adaptive view, and non-real time streaming strategies are explained and compared in terms of bandwidth requirements, encoding time-complexity, and bitrate overhead. Experimental results show that the view-aware methods reduce the required bandwidth for holography streaming at the cost of a bitrate increase.}, doi = {10.1109/icmew46912.2020.9106055}, keywords = {Holography, compression, bitrate adaption, dynamic adaptive streaming over HTTP, DASH}, url = {https://ieeexplore.ieee.org/document/9106055} }