core/vidl/vidl_ffmpeg_istream_v2.txx
Go to the documentation of this file.
00001 // This is core/vidl/vidl_ffmpeg_istream_v2.txx
00002 #ifndef vidl_ffmpeg_istream_v2_txx_
00003 #define vidl_ffmpeg_istream_v2_txx_
00004 #include "vidl_ffmpeg_istream.h"
00005 //:
00006 // \file
00007 // \author Matt Leotta
00008 // \author Amitha Perera
00009 // \date   26 Dec 2007
00010 //
00011 // Update implementation based on ffmpeg svn -r11322
00012 // and libswscale svn -r25485.
00013 
00014 //-----------------------------------------------------------------------------
00015 
00016 #include "vidl_ffmpeg_init.h"
00017 #include "vidl_frame.h"
00018 #include "vidl_ffmpeg_convert.h"
00019 
00020 #include <vcl_string.h>
00021 #include <vcl_iostream.h>
00022 
00023 extern "C" {
00024 #if FFMPEG_IN_SEVERAL_DIRECTORIES
00025 #include <libavcodec/avcodec.h>
00026 #include <libavformat/avformat.h>
00027 #include <libswscale/swscale.h>
00028 #else
00029 #include <ffmpeg/avcodec.h>
00030 #include <ffmpeg/avformat.h>
00031 #include <ffmpeg/swscale.h>
00032 #endif
00033 }
00034 
00035 //--------------------------------------------------------------------------------
00036 
00037 struct vidl_ffmpeg_istream::pimpl
00038 {
00039   pimpl()
00040   : fmt_cxt_( NULL ),
00041     vid_index_( -1 ),
00042     vid_str_( NULL ),
00043     last_dts( 0 ),
00044     frame_( NULL ),
00045     num_frames_( -2 ), // sentinel value to indicate not yet computed
00046     sws_context_( NULL ),
00047     cur_frame_( NULL ),
00048     deinterlace_( false ),
00049     frame_number_offset_( 0 )
00050   {
00051     packet_.data = NULL;
00052   }
00053 
00054   AVFormatContext* fmt_cxt_;
00055   int vid_index_;
00056   AVStream* vid_str_;
00057 
00058   //: Decode time of last frame.
00059   int64_t last_dts;
00060 
00061   //: Start time of the stream, to offset the dts when computing the frame number.
00062   int64_t start_time;
00063 
00064   //: The last successfully read frame.
00065   //
00066   // If frame_->data[0] is not NULL, then the frame corresponds to
00067   // the codec state, so that codec.width and so on apply to the
00068   // frame data.
00069   AVFrame* frame_;
00070 
00071 
00072   //: The last successfully read packet (before decoding).
00073   //  This must not be freed if the packet contains the raw image,
00074   //  in which case the frame_ will have only a shallow copy
00075   AVPacket packet_;
00076 
00077   //: number of counted frames
00078   int num_frames_;
00079 
00080   //: A software scaling context
00081   //
00082   // This is the context used for the software scaling and colour
00083   // conversion routines. Since the conversion is likely to be the
00084   // same for each frame, we save the context to avoid re-creating it
00085   // every time.
00086   SwsContext* sws_context_;
00087 
00088   //: A contiguous memory buffer to store the current image data
00089   vil_memory_chunk_sptr contig_memory_;
00090 
00091   //: The last successfully decoded frame.
00092   mutable vidl_frame_sptr cur_frame_;
00093 
00094   //: Apply deinterlacing on the frames?
00095   bool deinterlace_;
00096 
00097   //: Some codec/file format combinations need a frame number offset.
00098   // These codecs have a delay between reading packets and generating frames.
00099   unsigned frame_number_offset_;
00100 };
00101 
00102 
00103 //--------------------------------------------------------------------------------
00104 
00105 //: Constructor
00106 vidl_ffmpeg_istream::
00107 vidl_ffmpeg_istream()
00108   : is_( new vidl_ffmpeg_istream::pimpl )
00109 {
00110   vidl_ffmpeg_init();
00111 }
00112 
00113 
00114 //: Constructor - from a filename
00115 vidl_ffmpeg_istream::
00116 vidl_ffmpeg_istream(const vcl_string& filename)
00117   : is_( new vidl_ffmpeg_istream::pimpl )
00118 {
00119   vidl_ffmpeg_init();
00120   open(filename);
00121 }
00122 
00123 
00124 //: Destructor
00125 vidl_ffmpeg_istream::
00126 ~vidl_ffmpeg_istream()
00127 {
00128   close();
00129   delete is_;
00130 }
00131 
00132 //: Open a new stream using a filename
00133 bool
00134 vidl_ffmpeg_istream::
00135 open(const vcl_string& filename)
00136 {
00137   // Close any currently opened file
00138   close();
00139 
00140   // Open the file
00141   int err;
00142   if ( ( err = av_open_input_file( &is_->fmt_cxt_, filename.c_str(), NULL, 0, NULL ) ) != 0 ) {
00143     return false;
00144   }
00145 
00146   // Get the stream information by reading a bit of the file
00147   if ( av_find_stream_info( is_->fmt_cxt_ ) < 0 ) {
00148     return false;
00149   }
00150 
00151   // Find a video stream. Use the first one we find.
00152   is_->vid_index_ = -1;
00153   for ( unsigned i = 0; i < is_->fmt_cxt_->nb_streams; ++i ) {
00154     AVCodecContext *enc = is_->fmt_cxt_->streams[i]->codec;
00155     if ( enc->codec_type == CODEC_TYPE_VIDEO ) {
00156       is_->vid_index_ = i;
00157       break;
00158     }
00159   }
00160   if ( is_->vid_index_ == -1 ) {
00161     return false;
00162   }
00163 
00164   dump_format( is_->fmt_cxt_, 0, filename.c_str(), 0 );
00165   AVCodecContext *enc = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00166 
00167   // Open the stream
00168   AVCodec* codec = avcodec_find_decoder(enc->codec_id);
00169   if ( !codec || avcodec_open( enc, codec ) < 0 ) {
00170     return false;
00171   }
00172 
00173   is_->vid_str_ = is_->fmt_cxt_->streams[ is_->vid_index_ ];
00174   is_->frame_ = avcodec_alloc_frame();
00175 
00176   if ( is_->vid_str_->start_time == int64_t(1)<<63 ) {
00177     is_->start_time = 0;
00178   }
00179   else {
00180     is_->start_time = is_->vid_str_->start_time;
00181   }
00182 
00183   // The MPEG 2 codec has a latency of 1 frame when encoded in an AVI
00184   // stream, so the dts of the last packet (stored in last_dts) is
00185   // actually the next frame's dts.
00186   if ( is_->vid_str_->codec->codec_id == CODEC_ID_MPEG2VIDEO &&
00187        vcl_string("avi") == is_->fmt_cxt_->iformat->name ) {
00188     is_->frame_number_offset_ = 1;
00189   }
00190 
00191 
00192   return true;
00193 }
00194 
00195 
00196 //: Close the stream
00197 void
00198 vidl_ffmpeg_istream::
00199 close()
00200 {
00201   if( is_->packet_.data )
00202     av_free_packet( &is_->packet_ );  // free last packet
00203 
00204   if ( is_->frame_ ) {
00205     av_freep( &is_->frame_ );
00206   }
00207 
00208   is_->num_frames_ = -2;
00209   is_->contig_memory_ = 0;
00210   is_->vid_index_ = -1;
00211   if ( is_->vid_str_ ) {
00212     avcodec_close( is_->vid_str_->codec );
00213     is_->vid_str_ = 0;
00214   }
00215   if ( is_->fmt_cxt_ ) {
00216     av_close_input_file( is_->fmt_cxt_ );
00217     is_->fmt_cxt_ = 0;
00218   }
00219 }
00220 
00221 
00222 //: Return true if the stream is open for reading
00223 bool
00224 vidl_ffmpeg_istream::
00225 is_open() const
00226 {
00227   return ! ! is_->frame_;
00228 }
00229 
00230 
00231 //: Return true if the stream is in a valid state
00232 bool
00233 vidl_ffmpeg_istream::
00234 is_valid() const
00235 {
00236   return is_open() && is_->frame_->data[0] != 0;
00237 }
00238 
00239 
00240 //: Return true if the stream support seeking
00241 bool
00242 vidl_ffmpeg_istream::
00243 is_seekable() const
00244 {
00245   return true;
00246 }
00247 
00248 
00249 //: Return the number of frames if known
00250 //  returns -1 for non-seekable streams
00251 int
00252 vidl_ffmpeg_istream::num_frames() const
00253 {
00254   // to get an accurate frame count, quickly run through the entire
00255   // video.  We'll only do this if the user hasn't read any frames,
00256   // because we have no guarantee that we can successfully seek back
00257   // to anywhere but the beginning.  There is logic in advance() to
00258   // ensure this.
00259   vidl_ffmpeg_istream* mutable_this = const_cast<vidl_ffmpeg_istream*>(this);
00260   if ( mutable_this->is_->num_frames_ == -2 ) {
00261     mutable_this->is_->num_frames_ = 0;
00262     while (mutable_this->advance()) {
00263       ++mutable_this->is_->num_frames_;
00264     }
00265     av_seek_frame( mutable_this->is_->fmt_cxt_,
00266                    mutable_this->is_->vid_index_,
00267                    0,
00268                    AVSEEK_FLAG_BACKWARD );
00269   }
00270 
00271   return is_->num_frames_;
00272 }
00273 
00274 
00275 //: Return the current frame number
00276 unsigned int
00277 vidl_ffmpeg_istream::
00278 frame_number() const
00279 {
00280   // Quick return if the stream isn't open.
00281   if ( !is_valid() ) {
00282     return static_cast<unsigned int>(-1);
00283   }
00284 
00285   return ((is_->last_dts - is_->start_time)
00286           * is_->vid_str_->r_frame_rate.num / is_->vid_str_->r_frame_rate.den
00287           * is_->vid_str_->time_base.num + is_->vid_str_->time_base.den/2)
00288          / is_->vid_str_->time_base.den
00289          - int(is_->frame_number_offset_);
00290 }
00291 
00292 
00293 //: Return the width of each frame
00294 unsigned int
00295 vidl_ffmpeg_istream
00296 ::width() const
00297 {
00298   // Quick return if the stream isn't open.
00299   if ( !is_open() ) {
00300     return 0;
00301   }
00302 
00303   return is_->fmt_cxt_->streams[is_->vid_index_]->codec->width;
00304 }
00305 
00306 
00307 //: Return the height of each frame
00308 unsigned int
00309 vidl_ffmpeg_istream
00310 ::height() const
00311 {
00312   // Quick return if the stream isn't open.
00313   if ( !is_open() ) {
00314     return 0;
00315   }
00316 
00317   return is_->fmt_cxt_->streams[is_->vid_index_]->codec->height;
00318 }
00319 
00320 
00321 //: Return the pixel format
00322 vidl_pixel_format
00323 vidl_ffmpeg_istream
00324 ::format() const
00325 {
00326   // Quick return if the stream isn't open.
00327   if ( !is_open() ) {
00328     return VIDL_PIXEL_FORMAT_UNKNOWN;
00329   }
00330 
00331   AVCodecContext* enc = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00332   vidl_pixel_format fmt = vidl_pixel_format_from_ffmpeg(enc->pix_fmt);
00333   if (fmt == VIDL_PIXEL_FORMAT_UNKNOWN)
00334     return VIDL_PIXEL_FORMAT_RGB_24;
00335   return fmt;
00336 }
00337 
00338 
00339 //: Return the frame rate (0.0 if unspecified)
00340 double
00341 vidl_ffmpeg_istream
00342 ::frame_rate() const
00343 {
00344   // Quick return if the stream isn't open.
00345   if ( !is_open() ) {
00346     return 0.0;
00347   }
00348 
00349   return static_cast<double>(is_->vid_str_->r_frame_rate.num) / is_->vid_str_->r_frame_rate.den;
00350 }
00351 
00352 
00353 //: Return the duration in seconds (0.0 if unknown)
00354 double
00355 vidl_ffmpeg_istream
00356 ::duration() const
00357 {
00358   // Quick return if the stream isn't open.
00359   if ( !is_open() ) {
00360     return 0.0;
00361   }
00362   return static_cast<double>(is_->vid_str_->time_base.num)/is_->vid_str_->time_base.den
00363          * static_cast<double>(is_->vid_str_->duration);
00364 }
00365 
00366 
00367 //: Advance to the next frame (but don't acquire an image)
00368 bool
00369 vidl_ffmpeg_istream::
00370 advance()
00371 {
00372   // Quick return if the file isn't open.
00373   if ( !is_open() ) {
00374     return false;
00375   }
00376 
00377   // See the comment in num_frames().  This is to make sure that once
00378   // we start reading frames, we'll never try to march to the end to
00379   // figure out how many frames there are.
00380   if ( is_->num_frames_ == -2 ) {
00381     is_->num_frames_ = -1;
00382   }
00383 
00384   AVCodecContext* codec = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00385 
00386   if( is_->packet_.data )
00387     av_free_packet( &is_->packet_ );  // free previous packet
00388 
00389   int got_picture = 0;
00390 
00391   while ( got_picture == 0 ) {
00392     if ( av_read_frame( is_->fmt_cxt_, &is_->packet_ ) < 0 ) {
00393       break;
00394     }
00395     is_->last_dts = is_->packet_.dts;
00396 
00397     // Make sure that the packet is from the actual video stream.
00398     if (is_->packet_.stream_index==is_->vid_index_)
00399     {
00400       if ( avcodec_decode_video( codec,
00401                                  is_->frame_, &got_picture,
00402                                  is_->packet_.data, is_->packet_.size ) < 0 ) {
00403         vcl_cerr << "vidl_ffmpeg_istream: Error decoding packet!\n";
00404         return false;
00405       }
00406       else
00407         break; // without freeing the packet
00408     }
00409     av_free_packet( &is_->packet_ );
00410   }
00411 
00412   // From ffmpeg apiexample.c: some codecs, such as MPEG, transmit the
00413   // I and P frame with a latency of one frame. You must do the
00414   // following to have a chance to get the last frame of the video.
00415   if ( !got_picture ) {
00416     if ( avcodec_decode_video( codec,
00417                                is_->frame_, &got_picture,
00418                                NULL, 0 ) >= 0 ) {
00419       is_->last_dts += int64_t(is_->vid_str_->time_base.den) * is_->vid_str_->r_frame_rate.den
00420         / is_->vid_str_->time_base.num / is_->vid_str_->r_frame_rate.num;
00421     }
00422   }
00423 
00424   // The cached frame is out of date, whether we managed to get a new
00425   // frame or not.
00426   if (is_->cur_frame_)
00427     is_->cur_frame_->invalidate();
00428   is_->cur_frame_ = 0;
00429 
00430   if ( ! got_picture ) {
00431     is_->frame_->data[0] = NULL;
00432   }
00433 
00434   return got_picture != 0;
00435 }
00436 
00437 
00438 //: Read the next frame from the stream
00439 vidl_frame_sptr
00440 vidl_ffmpeg_istream::read_frame()
00441 {
00442   if (advance())
00443     return current_frame();
00444   return NULL;
00445 }
00446 
00447 
00448 //: Return the current frame in the stream
00449 vidl_frame_sptr
00450 vidl_ffmpeg_istream::current_frame()
00451 {
00452   // Quick return if the stream isn't valid
00453   if ( !is_valid() ) {
00454     return NULL;
00455   }
00456   AVCodecContext* enc = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00457   // If we have not already converted this frame, try to convert it
00458   if ( !is_->cur_frame_ && is_->frame_->data[0] != 0 )
00459   {
00460     int width = enc->width;
00461     int height = enc->height;
00462 
00463     // Deinterlace if requested
00464     if ( is_->deinterlace_ ) {
00465       avpicture_deinterlace( (AVPicture*)is_->frame_, (AVPicture*)is_->frame_,
00466                              enc->pix_fmt, width, height );
00467     }
00468 
00469     // If the pixel format is not recognized by vidl then convert the data into RGB_24
00470     vidl_pixel_format fmt = vidl_pixel_format_from_ffmpeg(enc->pix_fmt);
00471     if (fmt == VIDL_PIXEL_FORMAT_UNKNOWN)
00472     {
00473       int size = width*height*3;
00474       if (!is_->contig_memory_)
00475         is_->contig_memory_ = new vil_memory_chunk(size, VIL_PIXEL_FORMAT_BYTE);
00476       else
00477         is_->contig_memory_->set_size(size, VIL_PIXEL_FORMAT_BYTE);
00478 
00479       // Reuse the previous context if we can.
00480       is_->sws_context_ = sws_getCachedContext(
00481         is_->sws_context_,
00482         width, height, enc->pix_fmt,
00483         width, height, PIX_FMT_RGB24,
00484         SWS_BILINEAR,
00485         NULL, NULL, NULL );
00486 
00487       if ( is_->sws_context_ == NULL ) {
00488         vcl_cerr << "vidl_ffmpeg_istream: couldn't create conversion context\n";
00489         return vidl_frame_sptr();
00490       }
00491 
00492       AVPicture rgb_frame;
00493       avpicture_fill(&rgb_frame, (uint8_t*)is_->contig_memory_->data(), PIX_FMT_RGB24, width, height);
00494 
00495       sws_scale( is_->sws_context_,
00496                  is_->frame_->data, is_->frame_->linesize,
00497                  0, height,
00498                  rgb_frame.data, rgb_frame.linesize );
00499 
00500       is_->cur_frame_ = new vidl_shared_frame(is_->contig_memory_->data(),width,height,
00501                                               VIDL_PIXEL_FORMAT_RGB_24);
00502     }
00503     else
00504     {
00505       // Test for contiguous memory.  Sometimes FFMPEG uses scanline buffers larger
00506       // than the image width.  The extra memory is used in optimized decoding routines.
00507       // This leads to a segmented image buffer, not supported by vidl.
00508       AVPicture test_frame;
00509       avpicture_fill(&test_frame, is_->frame_->data[0], enc->pix_fmt, width, height);
00510       if (test_frame.data[1] == is_->frame_->data[1] &&
00511           test_frame.data[2] == is_->frame_->data[2] &&
00512           test_frame.linesize[0] == is_->frame_->linesize[0] &&
00513           test_frame.linesize[1] == is_->frame_->linesize[1] &&
00514           test_frame.linesize[2] == is_->frame_->linesize[2] )
00515       {
00516         is_->cur_frame_ = new vidl_shared_frame(is_->frame_->data[0], width, height, fmt);
00517       }
00518       // Copy the image into contiguous memory.
00519       else
00520       {
00521         if (!is_->contig_memory_) {
00522           int size = avpicture_get_size( enc->pix_fmt, width, height );
00523           is_->contig_memory_ = new vil_memory_chunk(size, VIL_PIXEL_FORMAT_BYTE);
00524         }
00525         avpicture_fill(&test_frame, (uint8_t*)is_->contig_memory_->data(), enc->pix_fmt, width, height);
00526         av_picture_copy(&test_frame, (AVPicture*)is_->frame_, enc->pix_fmt, width, height);
00527         // use a shared frame because the vil_memory_chunk is reused for each frame
00528         is_->cur_frame_ = new vidl_shared_frame(is_->contig_memory_->data(),width,height,fmt);
00529       }
00530     }
00531   }
00532 
00533   return is_->cur_frame_;
00534 }
00535 
00536 
00537 //: Seek to the given frame number
00538 // \returns true if successful
00539 bool
00540 vidl_ffmpeg_istream::
00541 seek_frame(unsigned int frame)
00542 {
00543   // Quick return if the stream isn't open.
00544   if ( !is_open() ) {
00545     return false;
00546   }
00547 
00548   // We rely on the initial cast to make sure all the operations happen in int64.
00549   int64_t req_timestamp =
00550     int64_t(frame + is_->frame_number_offset_)
00551     * is_->vid_str_->time_base.den
00552     * is_->vid_str_->r_frame_rate.den
00553     / is_->vid_str_->time_base.num
00554     / is_->vid_str_->r_frame_rate.num
00555     + is_->start_time;
00556 
00557   // Seek to a keyframe before the timestamp that we want.
00558   int seek = av_seek_frame( is_->fmt_cxt_, is_->vid_index_, req_timestamp, AVSEEK_FLAG_BACKWARD );
00559 
00560   if ( seek < 0 )
00561     return false;
00562 
00563   avcodec_flush_buffers( is_->vid_str_->codec );
00564 
00565   // We got to a key frame. Forward until we get to the frame we want.
00566   while ( true )
00567   {
00568     if ( ! advance() ) {
00569       return false;
00570     }
00571     if ( is_->last_dts >= req_timestamp ) {
00572       if ( is_->last_dts > req_timestamp ) {
00573         vcl_cerr << "Warning: seek went into the future!\n";
00574         return false;
00575       }
00576       return true;
00577     }
00578   }
00579 }
00580 
00581 #endif // vidl_ffmpeg_istream_v2_txx_