core/vidl/vidl_ffmpeg_istream_v1.txx
Go to the documentation of this file.
00001 // This is core/vidl/vidl_ffmpeg_istream_v1.txx
00002 #ifndef vidl_ffmpeg_istream_v1_txx_
00003 #define vidl_ffmpeg_istream_v1_txx_
00004 #include "vidl_ffmpeg_istream.h"
00005 //:
00006 // \file
00007 // \author Matt Leotta
00008 // \date   21 Dec 2005
00009 //
00010 //-----------------------------------------------------------------------------
00011 
00012 #include "vidl_ffmpeg_init.h"
00013 #include "vidl_frame.h"
00014 #include "vidl_ffmpeg_convert.h"
00015 
00016 #include <vcl_cstring.h>
00017 #include <vcl_string.h>
00018 #include <vcl_iostream.h>
00019 
00020 extern "C" {
00021 #if FFMPEG_IN_SEVERAL_DIRECTORIES
00022 #include <libavcodec/avcodec.h>
00023 #include <libavformat/avformat.h>
00024 #else
00025 #include <ffmpeg/avcodec.h>
00026 #include <ffmpeg/avformat.h>
00027 #endif
00028 }
00029 
00030 //--------------------------------------------------------------------------------
00031 
00032 struct vidl_ffmpeg_istream::pimpl
00033 {
00034   pimpl()
00035   : fmt_cxt_( NULL ),
00036   vid_index_( -1 ),
00037   vid_str_( NULL ),
00038   last_dts( 0 ),
00039   frame_( NULL ),
00040   num_frames_( -2 ), // sentinel value to indicate not yet computed
00041   cur_frame_( NULL ),
00042   deinterlace_( false ),
00043   frame_number_offset_( 0 )
00044   {
00045   }
00046 
00047   AVFormatContext* fmt_cxt_;
00048   int vid_index_;
00049   AVStream* vid_str_;
00050 
00051   //: Decode time of last frame.
00052   int64_t last_dts;
00053 
00054   //: The last successfully read frame.
00055   // If frame_->data[0] is not NULL, then the frame corresponds to
00056   // the codec state, so that codec.width and so on apply to the
00057   // frame data.
00058   AVFrame* frame_;
00059 
00060   //: number of counted frames
00061   int num_frames_;
00062 
00063   //: A contiguous memory buffer to store the current image data
00064   vil_memory_chunk_sptr contig_memory_;
00065 
00066   //: A contiguous memory buffer to frame data for raw video, because there is no decoder to hold it for us.
00067   vil_memory_chunk_sptr raw_video_memory_;
00068 
00069   //: The last successfully decoded frame.
00070   mutable vidl_frame_sptr cur_frame_;
00071 
00072   //: Apply deinterlacing on the frames?
00073   bool deinterlace_;
00074 
00075   //: Some codec/file format combinations need a frame number offset.
00076   // These codecs have a delay between reading packets and generating frames.
00077   unsigned frame_number_offset_;
00078 };
00079 
00080 
00081 //--------------------------------------------------------------------------------
00082 
00083 //: Constructor
00084 vidl_ffmpeg_istream::
00085 vidl_ffmpeg_istream()
00086   : is_( new vidl_ffmpeg_istream::pimpl )
00087 {
00088   vidl_ffmpeg_init();
00089 }
00090 
00091 
00092 //: Constructor - from a filename
00093 vidl_ffmpeg_istream::
00094 vidl_ffmpeg_istream(const vcl_string& filename)
00095   : is_( new vidl_ffmpeg_istream::pimpl )
00096 {
00097   vidl_ffmpeg_init();
00098   open(filename);
00099 }
00100 
00101 
00102 //: Destructor
00103 vidl_ffmpeg_istream::
00104 ~vidl_ffmpeg_istream()
00105 {
00106   close();
00107   delete is_;
00108 }
00109 
00110 //: Open a new stream using a filename
00111 bool
00112 vidl_ffmpeg_istream::
00113 open(const vcl_string& filename)
00114 {
00115   // Close any currently opened file
00116   close();
00117 
00118   // Open the file
00119   int err;
00120   if ( ( err = av_open_input_file( &is_->fmt_cxt_, filename.c_str(), NULL, 0, NULL ) ) != 0 ) {
00121     return false;
00122   }
00123 
00124   // Get the stream information by reading a bit of the file
00125   if ( av_find_stream_info( is_->fmt_cxt_ ) < 0 ) {
00126     return false;
00127   }
00128 
00129   // Find a video stream. Use the first one we find.
00130   is_->vid_index_ = -1;
00131   for ( unsigned int i = 0; i < is_->fmt_cxt_->nb_streams; ++i ) {
00132 #if LIBAVFORMAT_BUILD <= 4628
00133     AVCodecContext *enc = &is_->fmt_cxt_->streams[i]->codec;
00134 #else
00135     AVCodecContext *enc = is_->fmt_cxt_->streams[i]->codec;
00136 #endif
00137     if ( enc->codec_type == CODEC_TYPE_VIDEO ) {
00138   is_->vid_index_ = i;
00139   break;
00140     }
00141   }
00142   if ( is_->vid_index_ == -1 ) {
00143     return false;
00144   }
00145 
00146   dump_format( is_->fmt_cxt_, 0, filename.c_str(), 0 );
00147 #if LIBAVFORMAT_BUILD <= 4628
00148   AVCodecContext *enc = &is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00149 #else
00150   AVCodecContext *enc = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00151 #endif
00152   // Open the stream
00153   AVCodec* codec = avcodec_find_decoder(enc->codec_id);
00154   if ( !codec || avcodec_open( enc, codec ) < 0 ) {
00155     return false;
00156   }
00157 
00158 #if LIBAVFORMAT_BUILD <= 4623
00159   if (enc->frame_rate>1000 && enc->frame_rate_base==1)
00160     enc->frame_rate_base=1000;
00161 #endif
00162 
00163   is_->vid_str_ = is_->fmt_cxt_->streams[ is_->vid_index_ ];
00164   is_->frame_ = avcodec_alloc_frame();
00165 
00166   // newer releases of ffmpeg may require a 4th argument to av_seek_frame
00167 #if LIBAVFORMAT_BUILD <= 4616
00168   av_seek_frame( is_->fmt_cxt_, is_->vid_index_, 0 );
00169 #else
00170   av_seek_frame( is_->fmt_cxt_, is_->vid_index_, 0, AVSEEK_FLAG_BACKWARD );
00171 #endif
00172 
00173   return true;
00174 }
00175 
00176 
00177 //: Close the stream
00178 void
00179 vidl_ffmpeg_istream::
00180 close()
00181 {
00182   if ( is_->frame_ ) {
00183     av_free( is_->frame_ );
00184     is_->frame_ = 0;
00185   }
00186 
00187   is_->num_frames_ = -2;
00188   is_->contig_memory_ = 0;
00189   is_->vid_index_ = -1;
00190   if ( is_->vid_str_ ) {
00191 #if LIBAVFORMAT_BUILD <= 4628
00192     avcodec_close( &is_->vid_str_->codec );
00193 #else
00194     avcodec_close( is_->vid_str_->codec );
00195 #endif
00196     is_->vid_str_ = 0;
00197   }
00198   if ( is_->fmt_cxt_ ) {
00199     av_close_input_file( is_->fmt_cxt_ );
00200     is_->fmt_cxt_ = 0;
00201   }
00202 }
00203 
00204 
00205 //: Return true if the stream is open for reading
00206 bool
00207 vidl_ffmpeg_istream::
00208 is_open() const
00209 {
00210   return is_->frame_;
00211 }
00212 
00213 
00214 //: Return true if the stream is in a valid state
00215 bool
00216 vidl_ffmpeg_istream::
00217 is_valid() const
00218 {
00219   return is_open() && is_->frame_->data[0] != 0;
00220 }
00221 
00222 
00223 //: Return true if the stream support seeking
00224 bool
00225 vidl_ffmpeg_istream::
00226 is_seekable() const
00227 {
00228   return true;
00229 }
00230 
00231 
00232 //: Return the number of frames if known
00233 // \returns -1 for non-seekable streams
00234 int
00235 vidl_ffmpeg_istream::num_frames() const
00236 {
00237   // to get an accurate frame count, quickly run through the entire
00238   // video.  We'll only do this if the user hasn't read any frames,
00239   // because we have no guarantee that we can successfully seek back
00240   // to anywhere but the beginning.  There is logic in advance() to
00241   // ensure this.
00242   vidl_ffmpeg_istream* mutable_this = const_cast<vidl_ffmpeg_istream*>(this);
00243   if ( mutable_this->is_->num_frames_ == -2 ) {
00244     mutable_this->is_->num_frames_ = 0;
00245     while (mutable_this->advance()) {
00246       ++mutable_this->is_->num_frames_;
00247     }
00248 #if LIBAVFORMAT_BUILD <= 4616
00249     av_seek_frame( mutable_this->is_->fmt_cxt_,
00250                    mutable_this->is_->vid_index_,
00251                    0 );
00252 #else
00253     av_seek_frame( mutable_this->is_->fmt_cxt_,
00254                    mutable_this->is_->vid_index_,
00255                    0,
00256                    AVSEEK_FLAG_BACKWARD );
00257 #endif
00258   }
00259 
00260   return is_->num_frames_;
00261 }
00262 
00263 
00264 //: Return the current frame number
00265 unsigned int
00266 vidl_ffmpeg_istream::
00267 frame_number() const
00268 {
00269   // Quick return if the stream isn't open.
00270   if ( !is_valid() ) {
00271     return static_cast<unsigned int>(-1);
00272   }
00273 
00274   return ((is_->last_dts - is_->vid_str_->start_time)
00275 #if LIBAVFORMAT_BUILD <= 4623
00276           * is_->vid_str_->r_frame_rate / is_->vid_str_->r_frame_rate_base
00277           + AV_TIME_BASE/2) / AV_TIME_BASE
00278 #else
00279           * is_->vid_str_->r_frame_rate.num / is_->vid_str_->r_frame_rate.den
00280           * is_->vid_str_->time_base.num + is_->vid_str_->time_base.den/2)
00281          / is_->vid_str_->time_base.den
00282 #endif
00283       - is_->frame_number_offset_;
00284 }
00285 
00286 
00287 //: Return the width of each frame
00288 unsigned int
00289 vidl_ffmpeg_istream
00290 ::width() const
00291 {
00292   // Quick return if the stream isn't open.
00293   if ( !is_open() ) {
00294     return 0;
00295   }
00296 #if LIBAVFORMAT_BUILD <= 4628
00297   AVCodecContext* enc = &is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00298 #else
00299   AVCodecContext* enc = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00300 #endif
00301   return enc->width;
00302 }
00303 
00304 
00305 //: Return the height of each frame
00306 unsigned int
00307 vidl_ffmpeg_istream
00308 ::height() const
00309 {
00310   // Quick return if the stream isn't open.
00311   if ( !is_open() ) {
00312     return 0;
00313   }
00314 #if LIBAVFORMAT_BUILD <= 4628
00315   AVCodecContext* enc = &is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00316 #else
00317   AVCodecContext* enc = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00318 #endif
00319   return enc->height;
00320 }
00321 
00322 
00323 //: Return the pixel format
00324 vidl_pixel_format
00325 vidl_ffmpeg_istream
00326 ::format() const
00327 {
00328   // Quick return if the stream isn't open.
00329   if ( !is_open() ) {
00330     return VIDL_PIXEL_FORMAT_UNKNOWN;
00331   }
00332 #if LIBAVFORMAT_BUILD <= 4628
00333   AVCodecContext* enc = &is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00334 #else
00335   AVCodecContext* enc = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00336 #endif
00337   vidl_pixel_format fmt = vidl_pixel_format_from_ffmpeg(enc->pix_fmt);
00338   if (fmt == VIDL_PIXEL_FORMAT_UNKNOWN)
00339     return VIDL_PIXEL_FORMAT_RGB_24;
00340   return fmt;
00341 }
00342 
00343 
00344 //: Return the frame rate (0.0 if unspecified)
00345 double
00346 vidl_ffmpeg_istream
00347 ::frame_rate() const
00348 {
00349   // Quick return if the stream isn't open.
00350   if ( !is_open() ) {
00351     return 0.0;
00352   }
00353 #if LIBAVFORMAT_BUILD <= 4623
00354   return static_cast<double>(is_->vid_str_->r_frame_rate)
00355          / is_->vid_str_->r_frame_rate_base
00356          / AV_TIME_BASE;
00357 #else
00358   return static_cast<double>(is_->vid_str_->r_frame_rate.num) / is_->vid_str_->r_frame_rate.den;
00359 #endif
00360 }
00361 
00362 
00363 //: Return the duration in seconds (0.0 if unknown)
00364 double
00365 vidl_ffmpeg_istream
00366 ::duration() const
00367 {
00368   // Quick return if the stream isn't open.
00369   if ( !is_open() ) {
00370     return 0.0;
00371   }
00372 #if LIBAVFORMAT_BUILD <= 4623
00373   return static_cast<double>(is_->vid_str_->duration)
00374          / AV_TIME_BASE;
00375 #else
00376   return static_cast<double>(is_->vid_str_->time_base.num)/is_->vid_str_->time_base.den
00377          * static_cast<double>(is_->vid_str_->duration);
00378 #endif
00379 }
00380 
00381 
00382 //: Advance to the next frame (but don't acquire an image)
00383 bool
00384 vidl_ffmpeg_istream::
00385 advance()
00386 {
00387   // Quick return if the file isn't open.
00388   if ( !is_open() ) {
00389     return false;
00390   }
00391 
00392   // See the comment in num_frames().  This is to make sure that once
00393   // we start reading frames, we'll never try to march to the end to
00394   // figure out how many frames there are.
00395   if ( is_->num_frames_ == -2 ) {
00396     is_->num_frames_ = -1;
00397   }
00398 
00399 
00400 #if LIBAVFORMAT_BUILD <= 4628
00401   AVCodecContext* codec = &is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00402 #else
00403   AVCodecContext* codec = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00404 #endif
00405 
00406   AVPacket pkt;
00407   int got_picture = 0;
00408 
00409   while ( got_picture == 0 )
00410   {
00411     if ( av_read_frame( is_->fmt_cxt_, &pkt ) < 0 ) {
00412       break;
00413     }
00414     is_->last_dts = pkt.dts;
00415 
00416     // Make sure that the packet is from the actual video stream.
00417     if (pkt.stream_index==is_->vid_index_)
00418     {
00419       if ( codec->codec_id == CODEC_ID_RAWVIDEO ) {
00420         if (!is_->contig_memory_)
00421           is_->raw_video_memory_ = new vil_memory_chunk(pkt.size, VIL_PIXEL_FORMAT_BYTE);
00422         else
00423           is_->raw_video_memory_->set_size(pkt.size, VIL_PIXEL_FORMAT_BYTE);
00424         vcl_memcpy(is_->raw_video_memory_->data(), pkt.data, pkt.size);
00425 
00426         avpicture_fill( (AVPicture*)is_->frame_,
00427                         reinterpret_cast<uint8_t*>(is_->raw_video_memory_->data()),
00428                         codec->pix_fmt,
00429                         codec->width,
00430                         codec->height );
00431         is_->frame_->pict_type = FF_I_TYPE;
00432         got_picture = 1;
00433       } else {
00434         avcodec_decode_video( codec,
00435                               is_->frame_, &got_picture,
00436                               pkt.data, pkt.size );
00437       }
00438     }
00439     av_free_packet( &pkt );
00440   }
00441 
00442   // From ffmpeg apiexample.c: some codecs, such as MPEG, transmit the
00443   // I and P frame with a latency of one frame. You must do the
00444   // following to have a chance to get the last frame of the video.
00445   if ( !got_picture ) {
00446     avcodec_decode_video( codec,
00447                           is_->frame_, &got_picture,
00448                           NULL, 0 );
00449 #if LIBAVFORMAT_BUILD <= 4623
00450       is_->last_dts += AV_TIME_BASE * is_->vid_str_->r_frame_rate_base / is_->vid_str_->r_frame_rate;
00451 #else
00452       is_->last_dts += int64_t(is_->vid_str_->time_base.den) * is_->vid_str_->r_frame_rate.den
00453                   / is_->vid_str_->time_base.num / is_->vid_str_->r_frame_rate.num;
00454 #endif
00455   }
00456 
00457   // The cached frame is out of date, whether we managed to get a new
00458   // frame or not.
00459   if (is_->cur_frame_)
00460     is_->cur_frame_->invalidate();
00461   is_->cur_frame_ = 0;
00462 
00463   if ( ! got_picture ) {
00464     is_->frame_->data[0] = NULL;
00465   }
00466 
00467   return got_picture != 0;
00468 }
00469 
00470 
00471 //: Read the next frame from the stream
00472 vidl_frame_sptr
00473 vidl_ffmpeg_istream::read_frame()
00474 {
00475   if (advance())
00476     return current_frame();
00477   return NULL;
00478 }
00479 
00480 
00481 //: Return the current frame in the stream
00482 vidl_frame_sptr
00483 vidl_ffmpeg_istream::current_frame()
00484 {
00485   // Quick return if the stream isn't valid
00486   if ( !is_valid() ) {
00487     return NULL;
00488   }
00489 #if LIBAVFORMAT_BUILD <= 4628
00490   AVCodecContext* enc = &is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00491 #else
00492   AVCodecContext* enc = is_->fmt_cxt_->streams[is_->vid_index_]->codec;
00493 #endif
00494   // If we have not already converted this frame, try to convert it
00495   if ( !is_->cur_frame_ && is_->frame_->data[0] != 0 )
00496   {
00497     int width = enc->width;
00498     int height = enc->height;
00499 
00500     // Deinterlace if requested
00501     if ( is_->deinterlace_ ) {
00502       avpicture_deinterlace( (AVPicture*)is_->frame_, (AVPicture*)is_->frame_,
00503                              enc->pix_fmt, width, height );
00504     }
00505 
00506     // If the pixel format is not recognized by vidl then convert the data into RGB_24
00507     vidl_pixel_format fmt = vidl_pixel_format_from_ffmpeg(enc->pix_fmt);
00508     if (fmt == VIDL_PIXEL_FORMAT_UNKNOWN)
00509     {
00510       int size = width*height*3;
00511       if (!is_->contig_memory_)
00512         is_->contig_memory_ = new vil_memory_chunk(size, VIL_PIXEL_FORMAT_BYTE);
00513       else
00514         is_->contig_memory_->set_size(size, VIL_PIXEL_FORMAT_BYTE);
00515 
00516       AVPicture rgb_frame;
00517       avpicture_fill(&rgb_frame, (uint8_t*)is_->contig_memory_->data(), PIX_FMT_RGB24, width, height);
00518       img_convert(&rgb_frame, PIX_FMT_RGB24, (AVPicture*)is_->frame_, enc->pix_fmt, width, height);
00519       is_->cur_frame_ = new vidl_shared_frame(is_->contig_memory_->data(),width,height,
00520                                               VIDL_PIXEL_FORMAT_RGB_24);
00521     }
00522     else
00523     {
00524       // Test for contiguous memory.  Sometimes FFMPEG uses scanline buffers larger
00525       // than the image width.  The extra memory is used in optimized decoding routines.
00526       // This leads to a segmented image buffer, not supported by vidl.
00527       AVPicture test_frame;
00528       avpicture_fill(&test_frame, is_->frame_->data[0], enc->pix_fmt, width, height);
00529       if (test_frame.data[1] == is_->frame_->data[1] &&
00530           test_frame.data[2] == is_->frame_->data[2] &&
00531           test_frame.linesize[0] == is_->frame_->linesize[0] &&
00532           test_frame.linesize[1] == is_->frame_->linesize[1] &&
00533           test_frame.linesize[2] == is_->frame_->linesize[2] )
00534       {
00535         is_->cur_frame_ = new vidl_shared_frame(is_->frame_->data[0], width, height, fmt);
00536       }
00537       // Copy the image into contiguous memory.
00538       else
00539       {
00540         if (!is_->contig_memory_) {
00541           int size = avpicture_get_size( enc->pix_fmt, width, height );
00542           is_->contig_memory_ = new vil_memory_chunk(size, VIL_PIXEL_FORMAT_BYTE);
00543         }
00544         avpicture_fill(&test_frame, (uint8_t*)is_->contig_memory_->data(), enc->pix_fmt, width, height);
00545         img_copy(&test_frame, (AVPicture*)is_->frame_, enc->pix_fmt, width, height);
00546         // use a shared frame because the vil_memory_chunk is reused for each frame
00547         is_->cur_frame_ = new vidl_shared_frame(is_->contig_memory_->data(),width,height,fmt);
00548       }
00549     }
00550   }
00551 
00552   // The MPEG 2 codec has a latency of 1 frame, so the dts of the last
00553   // packet (stored in last_dts) is actually the next frame's
00554   // dts.
00555   if ( enc->codec_id == CODEC_ID_MPEG2VIDEO &&
00556        vcl_string("avi") == is_->fmt_cxt_->iformat->name ) {
00557     is_->frame_number_offset_ = 1;
00558   }
00559 
00560   return is_->cur_frame_;
00561 }
00562 
00563 
00564 //: Seek to the given frame number
00565 // \returns true if successful
00566 bool
00567 vidl_ffmpeg_istream::
00568 seek_frame(unsigned int frame)
00569 {
00570   // Quick return if the stream isn't open.
00571   if ( !is_open() ) {
00572     return false;
00573   }
00574 
00575 #if LIBAVFORMAT_BUILD <= 4623
00576   int64_t frame_size = int64_t(AV_TIME_BASE) * is_->vid_str_->r_frame_rate_base
00577                        / is_->vid_str_->r_frame_rate;
00578   int64_t req_timestamp = int64_t(AV_TIME_BASE) * frame * is_->vid_str_->r_frame_rate_base
00579                        / is_->vid_str_->r_frame_rate + is_->vid_str_->start_time;
00580 #else
00581   int64_t frame_size = int64_t(is_->vid_str_->time_base.den) * is_->vid_str_->r_frame_rate.den
00582                        / is_->vid_str_->time_base.num / is_->vid_str_->r_frame_rate.num;
00583   int64_t req_timestamp = int64_t(is_->vid_str_->time_base.den) * frame * is_->vid_str_->r_frame_rate.den
00584                        / is_->vid_str_->time_base.num / is_->vid_str_->r_frame_rate.num + is_->vid_str_->start_time;
00585 #endif
00586 
00587   if ( req_timestamp > frame_size/2 )
00588     req_timestamp -= frame_size/2;
00589   else
00590     req_timestamp = 0;
00591 
00592   // newer releases of ffmpeg may require a 4th argument to av_seek_frame
00593 #if LIBAVFORMAT_BUILD <= 4616
00594   int seek = av_seek_frame( is_->fmt_cxt_, is_->vid_index_, req_timestamp );
00595 #else
00596   int seek = av_seek_frame( is_->fmt_cxt_, is_->vid_index_, req_timestamp, AVSEEK_FLAG_BACKWARD );
00597 #endif
00598 
00599   if ( seek < 0 )
00600     return false;
00601   // We got to a key frame. Forward until we get to the frame we want.
00602   while ( true )
00603   {
00604     if ( ! advance() ) {
00605       return false;
00606     }
00607     if ( is_->last_dts >= req_timestamp ) {
00608       if ( is_->last_dts >= req_timestamp + frame_size ) {
00609         vcl_cerr << "Warning: seek went into the future!\n";
00610         return false;
00611       }
00612       return true;
00613     }
00614   }
00615 }
00616 
00617 #endif // vidl_ffmpeg_istream_v1_txx_