core/vidl/vidl_ffmpeg_istream_v3.txx
Go to the documentation of this file.
00001 // This is core/vidl/vidl_ffmpeg_istream_v3.txx
00002 #ifndef vidl_ffmpeg_istream_v3_txx_
00003 #define vidl_ffmpeg_istream_v3_txx_
00004 #include "vidl_ffmpeg_istream.h"
00005 //:
00006 // \file
00007 // \author Matt Leotta
00008 // \author Amitha Perera
00009 // \date   3 Nov 2011
00010 //
00011 // Update implementation based on
00012 // ffmpeg git hash 139f3ac42dcf24eb8c59af4aaab4e9afdccbc996
00013 
00014 //-----------------------------------------------------------------------------
00015 
00016 #include "vidl_ffmpeg_init.h"
00017 #include "vidl_frame.h"
00018 #include "vidl_ffmpeg_convert.h"
00019 
00020 #include <vcl_string.h>
00021 #include <vcl_iostream.h>
00022 
00023 extern "C" {
00024 #if FFMPEG_IN_SEVERAL_DIRECTORIES
00025 #include <libavcodec/avcodec.h>
00026 #include <libavformat/avformat.h>
00027 #include <libswscale/swscale.h>
00028 #else
00029 #include <ffmpeg/avcodec.h>
00030 #include <ffmpeg/avformat.h>
00031 #include <ffmpeg/swscale.h>
00032 #endif
00033 }
00034 
00035 //--------------------------------------------------------------------------------
00036 
00037 struct vidl_ffmpeg_istream::pimpl
00038 {
00039   pimpl()
00040   : fmt_cxt_( NULL ),
00041     vid_index_( -1 ),
00042     vid_str_( NULL ),
00043     last_dts( 0 ),
00044     frame_( NULL ),
00045     num_frames_( -2 ), // sentinel value to indicate not yet computed
00046     sws_context_( NULL ),
00047     cur_frame_( NULL ),
00048     deinterlace_( false ),
00049     frame_number_offset_( 0 )
00050   {
00051     packet_.data = NULL;
00052   }
00053 
00054   AVFormatContext* fmt_cxt_;
00055   int vid_index_;
00056   AVStream* vid_str_;
00057 
00058   //: Decode time of last frame.
00059   int64_t last_dts;
00060 
00061   //: Start time of the stream, to offset the dts when computing the frame number.
00062   int64_t start_time;
00063 
00064   //: The last successfully read frame.
00065   //
00066   // If frame_->data[0] is not NULL, then the frame corresponds to
00067   // the codec state, so that codec.width and so on apply to the
00068   // frame data.
00069   AVFrame* frame_;
00070 
00071   //: The last successfully read packet (before decoding).
00072   //  This must not be freed if the packet contains the raw image,
00073   //  in which case the frame_ will have only a shallow copy
00074   AVPacket packet_;
00075 
00076   //: number of counted frames
00077   int num_frames_;
00078 
00079   //: A software scaling context
00080   //
00081   // This is the context used for the software scaling and colour
00082   // conversion routines. Since the conversion is likely to be the
00083   // same for each frame, we save the context to avoid re-creating it
00084   // every time.
00085   SwsContext* sws_context_;
00086 
00087   //: A contiguous memory buffer to store the current image data
00088   vil_memory_chunk_sptr contig_memory_;
00089 
00090   //: The last successfully decoded frame.
00091   mutable vidl_frame_sptr cur_frame_;
00092 
00093   //: Apply deinterlacing on the frames?
00094   bool deinterlace_;
00095 
00096   //: Some codec/file format combinations need a frame number offset.
00097   // These codecs have a delay between reading packets and generating frames.
00098   unsigned frame_number_offset_;
00099 };
00100 
00101 
00102 //--------------------------------------------------------------------------------
00103 
00104 //: Constructor
00105 vidl_ffmpeg_istream::
00106 vidl_ffmpeg_istream()
00107   : is_( new vidl_ffmpeg_istream::pimpl )
00108 {
00109   vidl_ffmpeg_init();
00110 }
00111 
00112 
00113 //: Constructor - from a filename
00114 vidl_ffmpeg_istream::
00115 vidl_ffmpeg_istream(const vcl_string& filename)
00116   : is_( new vidl_ffmpeg_istream::pimpl )
00117 {
00118   vidl_ffmpeg_init();
00119   open(filename);
00120 }
00121 
00122 
00123 //: Destructor
00124 vidl_ffmpeg_istream::
00125 ~vidl_ffmpeg_istream()
00126 {
00127   close();
00128   delete is_;
00129 }
00130 
00131 //: Open a new stream using a filename
00132 bool
00133 vidl_ffmpeg_istream::
00134 open(const vcl_string& filename)
00135 {
00136   // Close any currently opened file
00137   close();
00138 
00139   // Open the file
00140   int err;
00141   if ( ( err = avformat_open_input( &is_->fmt_cxt_, filename.c_str(), NULL, NULL ) ) != 0 ) {
00142     return false;
00143   }
00144 
00145   // Get the stream information by reading a bit of the file
00146   if ( av_find_stream_info( is_->fmt_cxt_ ) < 0 ) {
00147     return false;
00148   }
00149 
00150   // Find a video stream. Use the first one we find.
00151   is_->vid_index_ = -1;
00152   for ( unsigned i = 0; i < is_->fmt_cxt_->nb_streams; ++i ) {
00153     AVCodecContext *enc = is_->fmt_cxt_->streams[i]->codec;
00154     if ( enc->codec_type == AVMEDIA_TYPE_VIDEO ) {
00155       is_->vid_index_ = i;
00156       break;
00157     }
00158   }
00159   if ( is_->vid_index_ == -1 ) {
00160     return false;
00161   }
00162 
00163   av_dump_format( is_->fmt_cxt_, 0, filename.c_str(), 0 );
00164   AVCodecContext *enc = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00165 
00166   // Open the stream
00167   AVCodec* codec = avcodec_find_decoder(enc->codec_id);
00168   if ( !codec || avcodec_open( enc, codec ) < 0 ) {
00169     return false;
00170   }
00171 
00172   is_->vid_str_ = is_->fmt_cxt_->streams[ is_->vid_index_ ];
00173   is_->frame_ = avcodec_alloc_frame();
00174 
00175   if ( is_->vid_str_->start_time == int64_t(1)<<63 ) {
00176     is_->start_time = 0;
00177   }
00178   else {
00179     is_->start_time = is_->vid_str_->start_time;
00180   }
00181 
00182   // The MPEG 2 codec has a latency of 1 frame when encoded in an AVI
00183   // stream, so the dts of the last packet (stored in last_dts) is
00184   // actually the next frame's dts.
00185   if ( is_->vid_str_->codec->codec_id == CODEC_ID_MPEG2VIDEO &&
00186        vcl_string("avi") == is_->fmt_cxt_->iformat->name ) {
00187     is_->frame_number_offset_ = 1;
00188   }
00189 
00190   // Not sure if this does anything, but no harm either
00191   av_init_packet(&is_->packet_);
00192   is_->packet_.data = 0;
00193   is_->packet_.size = 0;
00194 
00195   return true;
00196 }
00197 
00198 
00199 //: Close the stream
00200 void
00201 vidl_ffmpeg_istream::
00202 close()
00203 {
00204   if( is_->packet_.data )
00205     av_free_packet( &is_->packet_ );  // free last packet
00206 
00207   if ( is_->frame_ ) {
00208     av_freep( &is_->frame_ );
00209   }
00210 
00211   is_->num_frames_ = -2;
00212   is_->contig_memory_ = 0;
00213   is_->vid_index_ = -1;
00214   if ( is_->vid_str_ ) {
00215     avcodec_close( is_->vid_str_->codec );
00216     is_->vid_str_ = 0;
00217   }
00218   if ( is_->fmt_cxt_ ) {
00219     av_close_input_file( is_->fmt_cxt_ );
00220     is_->fmt_cxt_ = 0;
00221   }
00222 }
00223 
00224 
00225 //: Return true if the stream is open for reading
00226 bool
00227 vidl_ffmpeg_istream::
00228 is_open() const
00229 {
00230   return ! ! is_->frame_;
00231 }
00232 
00233 
00234 //: Return true if the stream is in a valid state
00235 bool
00236 vidl_ffmpeg_istream::
00237 is_valid() const
00238 {
00239   return is_open() && is_->frame_->data[0] != 0;
00240 }
00241 
00242 
00243 //: Return true if the stream support seeking
00244 bool
00245 vidl_ffmpeg_istream::
00246 is_seekable() const
00247 {
00248   return true;
00249 }
00250 
00251 
00252 //: Return the number of frames if known
00253 //  returns -1 for non-seekable streams
00254 int
00255 vidl_ffmpeg_istream::num_frames() const
00256 {
00257   // to get an accurate frame count, quickly run through the entire
00258   // video.  We'll only do this if the user hasn't read any frames,
00259   // because we have no guarantee that we can successfully seek back
00260   // to anywhere but the beginning.  There is logic in advance() to
00261   // ensure this.
00262   vidl_ffmpeg_istream* mutable_this = const_cast<vidl_ffmpeg_istream*>(this);
00263   if ( mutable_this->is_->num_frames_ == -2 ) {
00264     mutable_this->is_->num_frames_ = 0;
00265     while (mutable_this->advance()) {
00266       ++mutable_this->is_->num_frames_;
00267     }
00268     av_seek_frame( mutable_this->is_->fmt_cxt_,
00269                    mutable_this->is_->vid_index_,
00270                    0,
00271                    AVSEEK_FLAG_BACKWARD );
00272   }
00273 
00274   return is_->num_frames_;
00275 }
00276 
00277 
00278 //: Return the current frame number
00279 unsigned int
00280 vidl_ffmpeg_istream::
00281 frame_number() const
00282 {
00283   // Quick return if the stream isn't open.
00284   if ( !is_valid() ) {
00285     return static_cast<unsigned int>(-1);
00286   }
00287 
00288   return (unsigned int)( ((is_->last_dts - is_->start_time)
00289                           * is_->vid_str_->r_frame_rate.num / is_->vid_str_->r_frame_rate.den
00290                           * is_->vid_str_->time_base.num + is_->vid_str_->time_base.den/2)
00291                            / is_->vid_str_->time_base.den
00292                            - int(is_->frame_number_offset_) );
00293 }
00294 
00295 
00296 //: Return the width of each frame
00297 unsigned int
00298 vidl_ffmpeg_istream
00299 ::width() const
00300 {
00301   // Quick return if the stream isn't open.
00302   if ( !is_open() ) {
00303     return 0;
00304   }
00305 
00306   return is_->fmt_cxt_->streams[is_->vid_index_]->codec->width;
00307 }
00308 
00309 
00310 //: Return the height of each frame
00311 unsigned int
00312 vidl_ffmpeg_istream
00313 ::height() const
00314 {
00315   // Quick return if the stream isn't open.
00316   if ( !is_open() ) {
00317     return 0;
00318   }
00319 
00320   return is_->fmt_cxt_->streams[is_->vid_index_]->codec->height;
00321 }
00322 
00323 
00324 //: Return the pixel format
00325 vidl_pixel_format
00326 vidl_ffmpeg_istream
00327 ::format() const
00328 {
00329   // Quick return if the stream isn't open.
00330   if ( !is_open() ) {
00331     return VIDL_PIXEL_FORMAT_UNKNOWN;
00332   }
00333 
00334   AVCodecContext* enc = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00335   vidl_pixel_format fmt = vidl_pixel_format_from_ffmpeg(enc->pix_fmt);
00336   if (fmt == VIDL_PIXEL_FORMAT_UNKNOWN)
00337     return VIDL_PIXEL_FORMAT_RGB_24;
00338   return fmt;
00339 }
00340 
00341 
00342 //: Return the frame rate (0.0 if unspecified)
00343 double
00344 vidl_ffmpeg_istream
00345 ::frame_rate() const
00346 {
00347   // Quick return if the stream isn't open.
00348   if ( !is_open() ) {
00349     return 0.0;
00350   }
00351 
00352   return static_cast<double>(is_->vid_str_->r_frame_rate.num) / is_->vid_str_->r_frame_rate.den;
00353 }
00354 
00355 
00356 //: Return the duration in seconds (0.0 if unknown)
00357 double
00358 vidl_ffmpeg_istream
00359 ::duration() const
00360 {
00361   // Quick return if the stream isn't open.
00362   if ( !is_open() ) {
00363     return 0.0;
00364   }
00365   return static_cast<double>(is_->vid_str_->time_base.num)/is_->vid_str_->time_base.den
00366          * static_cast<double>(is_->vid_str_->duration);
00367 }
00368 
00369 
00370 //: Advance to the next frame (but don't acquire an image)
00371 bool
00372 vidl_ffmpeg_istream::
00373 advance()
00374 {
00375   // Quick return if the file isn't open.
00376   if ( !is_open() ) {
00377     return false;
00378   }
00379 
00380   // See the comment in num_frames().  This is to make sure that once
00381   // we start reading frames, we'll never try to march to the end to
00382   // figure out how many frames there are.
00383   if ( is_->num_frames_ == -2 ) {
00384     is_->num_frames_ = -1;
00385   }
00386 
00387   AVCodecContext* codec = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00388 
00389   if( is_->packet_.data )
00390     av_free_packet( &is_->packet_ );  // free previous packet
00391 
00392   int got_picture = 0;
00393 
00394   while ( got_picture == 0 ) {
00395     if ( av_read_frame( is_->fmt_cxt_, &is_->packet_ ) < 0 ) {
00396       break;
00397     }
00398     is_->last_dts = is_->packet_.dts;
00399 
00400     // Make sure that the packet is from the actual video stream.
00401     if (is_->packet_.stream_index==is_->vid_index_)
00402     {
00403       if ( avcodec_decode_video2( codec,
00404                                   is_->frame_, &got_picture,
00405                                   &is_->packet_ ) < 0 ) {
00406         vcl_cerr << "vidl_ffmpeg_istream: Error decoding packet!\n";
00407         return false;
00408       }
00409       else
00410         break; // without freeing the packet
00411     }
00412     av_free_packet( &is_->packet_ );
00413   }
00414 
00415   // From ffmpeg apiexample.c: some codecs, such as MPEG, transmit the
00416   // I and P frame with a latency of one frame. You must do the
00417   // following to have a chance to get the last frame of the video.
00418   if ( !got_picture ) {
00419     av_init_packet(&is_->packet_);
00420     is_->packet_.data = NULL;
00421     is_->packet_.size = 0;
00422     if ( avcodec_decode_video2( codec,
00423                                 is_->frame_, &got_picture,
00424                                 &is_->packet_ ) >= 0 ) {
00425       is_->last_dts += int64_t(is_->vid_str_->time_base.den) * is_->vid_str_->r_frame_rate.den
00426         / is_->vid_str_->time_base.num / is_->vid_str_->r_frame_rate.num;
00427     }
00428   }
00429 
00430   // The cached frame is out of date, whether we managed to get a new
00431   // frame or not.
00432   if (is_->cur_frame_)
00433     is_->cur_frame_->invalidate();
00434   is_->cur_frame_ = 0;
00435 
00436   if ( ! got_picture ) {
00437     is_->frame_->data[0] = NULL;
00438   }
00439 
00440   return got_picture != 0;
00441 }
00442 
00443 
00444 //: Read the next frame from the stream
00445 vidl_frame_sptr
00446 vidl_ffmpeg_istream::read_frame()
00447 {
00448   if (advance())
00449     return current_frame();
00450   return NULL;
00451 }
00452 
00453 
00454 //: Return the current frame in the stream
00455 vidl_frame_sptr
00456 vidl_ffmpeg_istream::current_frame()
00457 {
00458   // Quick return if the stream isn't valid
00459   if ( !is_valid() ) {
00460     return NULL;
00461   }
00462   AVCodecContext* enc = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00463   // If we have not already converted this frame, try to convert it
00464   if ( !is_->cur_frame_ && is_->frame_->data[0] != 0 )
00465   {
00466     int width = enc->width;
00467     int height = enc->height;
00468 
00469     // Deinterlace if requested
00470     if ( is_->deinterlace_ ) {
00471       avpicture_deinterlace( (AVPicture*)is_->frame_, (AVPicture*)is_->frame_,
00472                              enc->pix_fmt, width, height );
00473     }
00474 
00475     // If the pixel format is not recognized by vidl then convert the data into RGB_24
00476     vidl_pixel_format fmt = vidl_pixel_format_from_ffmpeg(enc->pix_fmt);
00477     if (fmt == VIDL_PIXEL_FORMAT_UNKNOWN)
00478     {
00479       int size = width*height*3;
00480       if (!is_->contig_memory_)
00481         is_->contig_memory_ = new vil_memory_chunk(size, VIL_PIXEL_FORMAT_BYTE);
00482       else
00483         is_->contig_memory_->set_size(size, VIL_PIXEL_FORMAT_BYTE);
00484 
00485       // Reuse the previous context if we can.
00486       is_->sws_context_ = sws_getCachedContext(
00487         is_->sws_context_,
00488         width, height, enc->pix_fmt,
00489         width, height, PIX_FMT_RGB24,
00490         SWS_BILINEAR,
00491         NULL, NULL, NULL );
00492 
00493       if ( is_->sws_context_ == NULL ) {
00494         vcl_cerr << "vidl_ffmpeg_istream: couldn't create conversion context\n";
00495         return vidl_frame_sptr();
00496       }
00497 
00498       AVPicture rgb_frame;
00499       avpicture_fill(&rgb_frame, (uint8_t*)is_->contig_memory_->data(), PIX_FMT_RGB24, width, height);
00500 
00501       sws_scale( is_->sws_context_,
00502                  is_->frame_->data, is_->frame_->linesize,
00503                  0, height,
00504                  rgb_frame.data, rgb_frame.linesize );
00505 
00506       is_->cur_frame_ = new vidl_shared_frame(is_->contig_memory_->data(),width,height,
00507                                               VIDL_PIXEL_FORMAT_RGB_24);
00508     }
00509     else
00510     {
00511       // Test for contiguous memory.  Sometimes FFMPEG uses scanline buffers larger
00512       // than the image width.  The extra memory is used in optimized decoding routines.
00513       // This leads to a segmented image buffer, not supported by vidl.
00514       AVPicture test_frame;
00515       avpicture_fill(&test_frame, is_->frame_->data[0], enc->pix_fmt, width, height);
00516       if (test_frame.data[1] == is_->frame_->data[1] &&
00517           test_frame.data[2] == is_->frame_->data[2] &&
00518           test_frame.linesize[0] == is_->frame_->linesize[0] &&
00519           test_frame.linesize[1] == is_->frame_->linesize[1] &&
00520           test_frame.linesize[2] == is_->frame_->linesize[2] )
00521       {
00522         is_->cur_frame_ = new vidl_shared_frame(is_->frame_->data[0], width, height, fmt);
00523       }
00524       // Copy the image into contiguous memory.
00525       else
00526       {
00527         if (!is_->contig_memory_) {
00528           int size = avpicture_get_size( enc->pix_fmt, width, height );
00529           is_->contig_memory_ = new vil_memory_chunk(size, VIL_PIXEL_FORMAT_BYTE);
00530         }
00531         avpicture_fill(&test_frame, (uint8_t*)is_->contig_memory_->data(), enc->pix_fmt, width, height);
00532         av_picture_copy(&test_frame, (AVPicture*)is_->frame_, enc->pix_fmt, width, height);
00533         // use a shared frame because the vil_memory_chunk is reused for each frame
00534         is_->cur_frame_ = new vidl_shared_frame(is_->contig_memory_->data(),width,height,fmt);
00535       }
00536     }
00537   }
00538 
00539   return is_->cur_frame_;
00540 }
00541 
00542 
00543 //: Seek to the given frame number
00544 // \returns true if successful
00545 bool
00546 vidl_ffmpeg_istream::
00547 seek_frame(unsigned int frame)
00548 {
00549   // Quick return if the stream isn't open.
00550   if ( !is_open() ) {
00551     return false;
00552   }
00553 
00554   // We rely on the initial cast to make sure all the operations happen in int64.
00555   int64_t req_timestamp =
00556     int64_t(frame + is_->frame_number_offset_)
00557     * is_->vid_str_->time_base.den
00558     * is_->vid_str_->r_frame_rate.den
00559     / is_->vid_str_->time_base.num
00560     / is_->vid_str_->r_frame_rate.num
00561     + is_->start_time;
00562 
00563   // Seek to a keyframe before the timestamp that we want.
00564   int seek = av_seek_frame( is_->fmt_cxt_, is_->vid_index_, req_timestamp, AVSEEK_FLAG_BACKWARD );
00565 
00566   if ( seek < 0 )
00567     return false;
00568 
00569   avcodec_flush_buffers( is_->vid_str_->codec );
00570 
00571   // We got to a key frame. Forward until we get to the frame we want.
00572   while ( true )
00573   {
00574     if ( ! advance() ) {
00575       return false;
00576     }
00577     if ( is_->last_dts >= req_timestamp ) {
00578       if ( is_->last_dts > req_timestamp ) {
00579         vcl_cerr << "Warning: seek went into the future!\n";
00580         return false;
00581       }
00582       return true;
00583     }
00584   }
00585 }
00586 
00587 #endif // vidl_ffmpeg_istream_v3_txx_